From cdfe1f89a64439c3dfe116596f336dd9cae587be Mon Sep 17 00:00:00 2001 From: Iury Gregory Melo Ferreira Date: Wed, 3 Jul 2019 19:01:12 +0200 Subject: [PATCH] Monitoring for ironic-prometheus-exporter This commit adds the changes that will allow Prometheus to collect data from the ironic-prometheus-exporter[1] that runs in the ironic-image [2]. - Added Service for the ironic-prometheus-exporter with same run level of the openshift-machine-api. - Added the ServiceMonitor for the ironic-prometheus-exporter - Added PrometheusRule with alerts for baremetal_temp_celsius metric. - Added the ironic-exporter container Note: Using the run level 90 to ServiceMonitor and PrometheusRule to ensure that the Service and the Prometheus Role and RoleBinding have been applied. [1] https://github.com/metal3-io/ironic-prometheus-exporter [2] https://github.com/metal3-io/ironic-image --- ...10_service-ironic-prometheus-exporter.yaml | 22 +++++++++++++++ ...icemonitor-ironic-prometheus-exporter.yaml | 22 +++++++++++++++ ...theusrules-ironic-prometheus-exporter.yaml | 27 +++++++++++++++++++ pkg/operator/baremetal_pod.go | 16 +++++++++++ 4 files changed, 87 insertions(+) create mode 100644 install/0000_30_machine-api-operator_10_service-ironic-prometheus-exporter.yaml create mode 100644 install/0000_90_machine-api-operator_04_servicemonitor-ironic-prometheus-exporter.yaml create mode 100644 install/0000_90_machine-api-operator_05_prometheusrules-ironic-prometheus-exporter.yaml diff --git a/install/0000_30_machine-api-operator_10_service-ironic-prometheus-exporter.yaml b/install/0000_30_machine-api-operator_10_service-ironic-prometheus-exporter.yaml new file mode 100644 index 0000000000..d73f9676c0 --- /dev/null +++ b/install/0000_30_machine-api-operator_10_service-ironic-prometheus-exporter.yaml @@ -0,0 +1,22 @@ +{{if .usingBareMetal}} +apiVersion: monitoring.coreos.com/v1 +kind: Service +metadata: + name: metal3-baremetalhost-controller + namespace: openshift-machine-api + labels: + app: ironic-exporter +spec: + ports: + - name: http + protocol: TCP + port: 9608 + targetPort: 9608 + selector: + app: ironic-exporter + clusterIP: None + type: ClusterIP + sessionAffinity: None +status: + loadBalancer: {} +{{- end}} diff --git a/install/0000_90_machine-api-operator_04_servicemonitor-ironic-prometheus-exporter.yaml b/install/0000_90_machine-api-operator_04_servicemonitor-ironic-prometheus-exporter.yaml new file mode 100644 index 0000000000..b9ad20c1f9 --- /dev/null +++ b/install/0000_90_machine-api-operator_04_servicemonitor-ironic-prometheus-exporter.yaml @@ -0,0 +1,22 @@ +{{if .usingBareMetal}} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + app: ironic-exporter + name: metal3-baremetalhost-controller + namespace: openshift-machine-api +spec: + endpoints: + - port: "9608-tcp" + scheme: http + path: /metrics + targetPort: 9608 + jobLabel: app + namespaceSelector: + matchNames: + - metal3-baremetalhost-controller + selector: + matchLabels: + app: ironic-exporter +{{- end}} diff --git a/install/0000_90_machine-api-operator_05_prometheusrules-ironic-prometheus-exporter.yaml b/install/0000_90_machine-api-operator_05_prometheusrules-ironic-prometheus-exporter.yaml new file mode 100644 index 0000000000..64bb1286a8 --- /dev/null +++ b/install/0000_90_machine-api-operator_05_prometheusrules-ironic-prometheus-exporter.yaml @@ -0,0 +1,27 @@ +{{if .usingBareMetal}} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: metal3-baremetalhost-controller + namespace: openshift-machine-api +spec: + groups: + - name: metal3-baremetalhost-controller + rules: + - alert: HighCPUTemperature + annotations: + summary: "The baremetal node {{ $labels.node_name }} CPU {{ $labels.entity_id }} is too high" + description: "The baremetal node {{ $labels.node_name }} CPU {{ $labels.entity_id }} was too high in the past 5 minutes. Last measurement {{ $value }}" + expr: baremetal_temp_celsius > 96 + for: 5m + labels: + severity: warning + - alert: LowCPUTemperature + annotations: + summary: "The baremetal node {{ $labels.node_name }} CPU {{ $labels.entity_id }} is too low" + description: "The baremetal node {{ $labels.node_name }} CPU {{ $labels.entity_id }} was too low in the past 5 minutes. Last measurement {{ $value }}" + expr: baremetal_temp_celsius < 3 + for: 5m + labels: + severity: warning +{{- end}} diff --git a/pkg/operator/baremetal_pod.go b/pkg/operator/baremetal_pod.go index 6c68e35ce0..e48af45f1f 100644 --- a/pkg/operator/baremetal_pod.go +++ b/pkg/operator/baremetal_pod.go @@ -290,6 +290,7 @@ func newMetal3Containers(config *OperatorConfig, baremetalProvisioningConfig Bar containers = append(containers, createContainerMetal3IronicApi(config, baremetalProvisioningConfig)) containers = append(containers, createContainerMetal3IronicInspector(config, baremetalProvisioningConfig)) containers = append(containers, createContainerMetal3StaticIpManager(config, baremetalProvisioningConfig)) + containers = append(containers, createContainerMetal3IronicExporter(config)) return containers } @@ -424,3 +425,18 @@ func createContainerMetal3StaticIpManager(config *OperatorConfig, baremetalProvi } return container } + +func createContainerMetal3IronicExporter(config *OperatorConfig) corev1.Container { + + container := corev1.Container{ + Name: "metal3-ironic-exporter", + Image: config.BaremetalControllers.Ironic, + ImagePullPolicy: "Always", + SecurityContext: &corev1.SecurityContext{ + Privileged: pointer.BoolPtr(true), + }, + Command: []string{"/bin/runironic-exporter"}, + VolumeMounts: volumeMounts, + } + return container +}