From 856fe58216e15f06f24fcd4b39aeee09c3b3ab43 Mon Sep 17 00:00:00 2001 From: iuri aranda Date: Thu, 27 Feb 2020 12:15:23 +0100 Subject: [PATCH 1/2] Make FS space alerts thresholds configurable (#1) This makes it possible to tweak the thresholds for the NodeFilesystemSpaceFillingUp alerts. Which might be necessary in systems like Kubernetes, where the image garbage collector runs at 85%, so it's not a problem that the disk reaches that usage %. Signed-off-by: iuri aranda --- docs/node-mixin/alerts/alerts.libsonnet | 4 ++-- docs/node-mixin/config.libsonnet | 10 ++++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/docs/node-mixin/alerts/alerts.libsonnet b/docs/node-mixin/alerts/alerts.libsonnet index 0cdc16e201..b95b1c5894 100644 --- a/docs/node-mixin/alerts/alerts.libsonnet +++ b/docs/node-mixin/alerts/alerts.libsonnet @@ -8,7 +8,7 @@ alert: 'NodeFilesystemSpaceFillingUp', expr: ||| ( - node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s} * 100 < 40 + node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s} * 100 < %(fsSpaceFillingUpWarningThreshold)d and predict_linear(node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s}[6h], 24*60*60) < 0 and @@ -28,7 +28,7 @@ alert: 'NodeFilesystemSpaceFillingUp', expr: ||| ( - node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s} * 100 < 20 + node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s} * 100 < %(fsSpaceFillingUpCriticalThreshold)d and predict_linear(node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s}[6h], 4*60*60) < 0 and diff --git a/docs/node-mixin/config.libsonnet b/docs/node-mixin/config.libsonnet index fdea71d218..c195b3533d 100644 --- a/docs/node-mixin/config.libsonnet +++ b/docs/node-mixin/config.libsonnet @@ -35,6 +35,16 @@ // just a warning for K8s nodes. nodeCriticalSeverity: 'critical', + // Available disk space (%) thresholds on which to trigger the + // 'NodeFilesystemSpaceFillingUp' alerts. In some cases you'll want + // to adjust these, e.g. by default Kubernetes runs the image + // garbage collection when the disk usage reaches 85% of its + // available space. In that case, you'll want to reduce the + // critical threshold below to something like 14 or 15, otherwise + // the alert could fire under normal node usage. + fsSpaceFillingUpWarningThreshold: 40, + fsSpaceFillingUpCriticalThreshold: 20, + grafana_prefix: '', }, } From 21264bc4eac8d6a6a227fe2b2f52d8fdb2be5506 Mon Sep 17 00:00:00 2001 From: iuri aranda Date: Mon, 2 Mar 2020 15:09:07 +0100 Subject: [PATCH 2/2] Adjust fs alert threshold descriptions Signed-off-by: iuri aranda --- docs/node-mixin/config.libsonnet | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/docs/node-mixin/config.libsonnet b/docs/node-mixin/config.libsonnet index c195b3533d..c06252cba5 100644 --- a/docs/node-mixin/config.libsonnet +++ b/docs/node-mixin/config.libsonnet @@ -36,10 +36,12 @@ nodeCriticalSeverity: 'critical', // Available disk space (%) thresholds on which to trigger the - // 'NodeFilesystemSpaceFillingUp' alerts. In some cases you'll want - // to adjust these, e.g. by default Kubernetes runs the image - // garbage collection when the disk usage reaches 85% of its - // available space. In that case, you'll want to reduce the + // 'NodeFilesystemSpaceFillingUp' alerts. These alerts fire if the disk + // usage grows in a way that it is predicted to run out in 4h or 1d + // and if the provided thresholds have been reached right now. + // In some cases you'll want to adjust these, e.g. by default Kubernetes + // runs the image garbage collection when the disk usage reaches 85% + // of its available space. In that case, you'll want to reduce the // critical threshold below to something like 14 or 15, otherwise // the alert could fire under normal node usage. fsSpaceFillingUpWarningThreshold: 40,