diff --git a/.github/workflows/golangci-lint.yml b/.github/workflows/golangci-lint.yml
index 662ea3b6ee..136d7d4b96 100644
--- a/.github/workflows/golangci-lint.yml
+++ b/.github/workflows/golangci-lint.yml
@@ -21,6 +21,9 @@ jobs:
uses: actions/setup-go@v2
with:
go-version: 1.18.x
+ - name: Install snmp_exporter/generator dependencies
+ run: sudo apt-get update && sudo apt-get -y install libsnmp-dev
+ if: github.repository == 'prometheus/snmp_exporter'
- name: Lint
uses: golangci/golangci-lint-action@v3.1.0
with:
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
index 9a1aff4127..d325872bdf 100644
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@@ -1,3 +1,3 @@
-## Prometheus Community Code of Conduct
+# Prometheus Community Code of Conduct
-Prometheus follows the [CNCF Code of Conduct](https://github.com/cncf/foundation/blob/master/code-of-conduct.md).
+Prometheus follows the [CNCF Code of Conduct](https://github.com/cncf/foundation/blob/main/code-of-conduct.md).
diff --git a/SECURITY.md b/SECURITY.md
index 67741f015a..fed02d85c7 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -3,4 +3,4 @@
The Prometheus security policy, including how to report vulnerabilities, can be
found here:
-https://prometheus.io/docs/operating/security/
+
diff --git a/collector/arp_linux.go b/collector/arp_linux.go
index 7592057d93..84216fe975 100644
--- a/collector/arp_linux.go
+++ b/collector/arp_linux.go
@@ -34,7 +34,7 @@ var (
)
type arpCollector struct {
- deviceFilter netDevFilter
+ deviceFilter devFilter
entries *prometheus.Desc
logger log.Logger
}
@@ -46,7 +46,7 @@ func init() {
// NewARPCollector returns a new Collector exposing ARP stats.
func NewARPCollector(logger log.Logger) (Collector, error) {
return &arpCollector{
- deviceFilter: newNetDevFilter(*arpDeviceExclude, *arpDeviceInclude),
+ deviceFilter: newdevFilter(*arpDeviceExclude, *arpDeviceInclude),
entries: prometheus.NewDesc(
prometheus.BuildFQName(namespace, "arp", "entries"),
"ARP entries by device",
diff --git a/collector/diskstats_linux.go b/collector/diskstats_linux.go
index f0de40776b..41800996ec 100644
--- a/collector/diskstats_linux.go
+++ b/collector/diskstats_linux.go
@@ -17,9 +17,8 @@
package collector
import (
+ "errors"
"fmt"
- "regexp"
-
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
@@ -36,7 +35,9 @@ const (
)
var (
- ignoredDevices = kingpin.Flag("collector.diskstats.ignored-devices", "Regexp of devices to ignore for diskstats.").Default("^(ram|loop|fd|(h|s|v|xv)d[a-z]|nvme\\d+n\\d+p)\\d+$").String()
+ diskstatsDeviceExclude = kingpin.Flag("collector.diskstats.device-exclude", "Regexp of diskstats devices to exclude (mutually exclusive to device-include).").String()
+ oldDiskstatsDeviceExclude = kingpin.Flag("collector.diskstats.ignored-devices", "DEPRECATED: Use collector.diskstats.device-exclude").Default("^(ram|loop|fd|(h|s|v|xv)d[a-z]|nvme\\d+n\\d+p)\\d+$").Hidden().String()
+ diskstatsDeviceInclude = kingpin.Flag("collector.diskstats.device-include", "Regexp of diskstats devices to include (mutually exclusive to device-exclude).").String()
)
type typedFactorDesc struct {
@@ -49,11 +50,11 @@ func (d *typedFactorDesc) mustNewConstMetric(value float64, labels ...string) pr
}
type diskstatsCollector struct {
- ignoredDevicesPattern *regexp.Regexp
- fs blockdevice.FS
- infoDesc typedFactorDesc
- descs []typedFactorDesc
- logger log.Logger
+ deviceFilter devFilter
+ fs blockdevice.FS
+ infoDesc typedFactorDesc
+ descs []typedFactorDesc
+ logger log.Logger
}
func init() {
@@ -68,10 +69,30 @@ func NewDiskstatsCollector(logger log.Logger) (Collector, error) {
if err != nil {
return nil, fmt.Errorf("failed to open sysfs: %w", err)
}
+ if *oldDiskstatsDeviceExclude != "" {
+ if *diskstatsDeviceExclude == "" {
+ level.Warn(logger).Log("msg", "--collector.netdev.ignored-devices is DEPRECATED and will be removed in 2.0.0, use --collector.diskstats.device-exclude")
+ *diskstatsDeviceExclude = *oldDiskstatsDeviceExclude
+ } else {
+ return nil, errors.New("--collector.diskstats.ignored-devices and --collector.diskstats.device-exclude are mutually exclusive")
+ }
+ }
+
+ if *diskstatsDeviceExclude != "" && *diskstatsDeviceInclude != "" {
+ return nil, errors.New("device-exclude & device-include are mutually exclusive")
+ }
+
+ if *diskstatsDeviceExclude != "" {
+ level.Info(logger).Log("msg", "Parsed flag --collector.diskstats.device-exclude", "flag", *diskstatsDeviceExclude)
+ }
+
+ if *diskstatsDeviceInclude != "" {
+ level.Info(logger).Log("msg", "Parsed Flag --collector.diskstats.device-include", "flag", *diskstatsDeviceInclude)
+ }
return &diskstatsCollector{
- ignoredDevicesPattern: regexp.MustCompile(*ignoredDevices),
- fs: fs,
+ deviceFilter: newdevFilter(*diskstatsDeviceExclude, *diskstatsDeviceInclude),
+ fs: fs,
infoDesc: typedFactorDesc{
desc: prometheus.NewDesc(prometheus.BuildFQName(namespace, diskSubsystem, "info"),
"Info of /sys/block/.",
@@ -194,11 +215,9 @@ func (c *diskstatsCollector) Update(ch chan<- prometheus.Metric) error {
for _, stats := range diskStats {
dev := stats.DeviceName
- if c.ignoredDevicesPattern.MatchString(dev) {
- level.Debug(c.logger).Log("msg", "Ignoring device", "device", dev, "pattern", c.ignoredDevicesPattern)
+ if c.deviceFilter.ignored(dev) {
continue
}
-
ch <- c.infoDesc.mustNewConstMetric(1.0, dev, fmt.Sprint(stats.MajorNumber), fmt.Sprint(stats.MinorNumber))
statCount := stats.IoStatsCount - 3 // Total diskstats record count, less MajorNumber, MinorNumber and DeviceName
@@ -227,6 +246,7 @@ func (c *diskstatsCollector) Update(ch chan<- prometheus.Metric) error {
}
ch <- c.descs[i].mustNewConstMetric(val, dev)
}
+
}
return nil
}
diff --git a/collector/ethtool_linux.go b/collector/ethtool_linux.go
index 04a7260d2d..9f5193620e 100644
--- a/collector/ethtool_linux.go
+++ b/collector/ethtool_linux.go
@@ -76,7 +76,7 @@ type ethtoolCollector struct {
entries map[string]*prometheus.Desc
entriesMutex sync.Mutex
ethtool Ethtool
- deviceFilter netDevFilter
+ deviceFilter devFilter
infoDesc *prometheus.Desc
metricsPattern *regexp.Regexp
logger log.Logger
@@ -100,7 +100,7 @@ func makeEthtoolCollector(logger log.Logger) (*ethtoolCollector, error) {
return ðtoolCollector{
fs: fs,
ethtool: ðtoolLibrary{e},
- deviceFilter: newNetDevFilter(*ethtoolDeviceExclude, *ethtoolDeviceInclude),
+ deviceFilter: newdevFilter(*ethtoolDeviceExclude, *ethtoolDeviceInclude),
metricsPattern: regexp.MustCompile(*ethtoolIncludedMetrics),
logger: logger,
entries: map[string]*prometheus.Desc{
diff --git a/collector/netdev_bsd.go b/collector/netdev_bsd.go
index d472b2dfeb..41baa9e4f7 100644
--- a/collector/netdev_bsd.go
+++ b/collector/netdev_bsd.go
@@ -34,7 +34,7 @@ import (
*/
import "C"
-func getNetDevStats(filter *netDevFilter, logger log.Logger) (netDevStats, error) {
+func getNetDevStats(filter *devFilter, logger log.Logger) (netDevStats, error) {
netDev := netDevStats{}
var ifap, ifa *C.struct_ifaddrs
diff --git a/collector/netdev_common.go b/collector/netdev_common.go
index 595b34b464..7bedc415f3 100644
--- a/collector/netdev_common.go
+++ b/collector/netdev_common.go
@@ -86,7 +86,7 @@ func NewNetDevCollector(logger log.Logger) (Collector, error) {
return &netDevCollector{
subsystem: "network",
- deviceFilter: newNetDevFilter(*netdevDeviceExclude, *netdevDeviceInclude),
+ deviceFilter: newdevFilter(*netdevDeviceExclude, *netdevDeviceInclude),
metricDescs: map[string]*prometheus.Desc{},
logger: logger,
}, nil
diff --git a/collector/netdev_darwin.go b/collector/netdev_darwin.go
index 0f83a60665..b1789d4b43 100644
--- a/collector/netdev_darwin.go
+++ b/collector/netdev_darwin.go
@@ -27,7 +27,7 @@ import (
"golang.org/x/sys/unix"
)
-func getNetDevStats(filter *netDevFilter, logger log.Logger) (netDevStats, error) {
+func getNetDevStats(filter *devFilter, logger log.Logger) (netDevStats, error) {
netDev := netDevStats{}
ifs, err := net.Interfaces()
diff --git a/collector/netdev_filter.go b/collector/netdev_filter.go
index e809c499e3..25ec5b9a1a 100644
--- a/collector/netdev_filter.go
+++ b/collector/netdev_filter.go
@@ -17,12 +17,12 @@ import (
"regexp"
)
-type netDevFilter struct {
+type devFilter struct {
ignorePattern *regexp.Regexp
acceptPattern *regexp.Regexp
}
-func newNetDevFilter(ignoredPattern, acceptPattern string) (f netDevFilter) {
+func newdevFilter(ignoredPattern, acceptPattern string) (f devFilter) {
if ignoredPattern != "" {
f.ignorePattern = regexp.MustCompile(ignoredPattern)
}
@@ -35,7 +35,7 @@ func newNetDevFilter(ignoredPattern, acceptPattern string) (f netDevFilter) {
}
// ignores returns whether the device should be ignored
-func (f *netDevFilter) ignored(name string) bool {
+func (f *devFilter) ignored(name string) bool {
return ((f.ignorePattern != nil && f.ignorePattern.MatchString(name)) ||
(f.acceptPattern != nil && !f.acceptPattern.MatchString(name)))
}
diff --git a/collector/netdev_filter_test.go b/collector/netdev_filter_test.go
index 13cebbc68a..182364bdb8 100644
--- a/collector/netdev_filter_test.go
+++ b/collector/netdev_filter_test.go
@@ -17,7 +17,7 @@ import (
"testing"
)
-func TestNetDevFilter(t *testing.T) {
+func TestdevFilter(t *testing.T) {
tests := []struct {
ignore string
accept string
@@ -33,7 +33,7 @@ func TestNetDevFilter(t *testing.T) {
}
for _, test := range tests {
- filter := newNetDevFilter(test.ignore, test.accept)
+ filter := newdevFilter(test.ignore, test.accept)
result := filter.ignored(test.name)
if result != test.expectedResult {
diff --git a/collector/netdev_linux.go b/collector/netdev_linux.go
index e825db0855..88fa575af6 100644
--- a/collector/netdev_linux.go
+++ b/collector/netdev_linux.go
@@ -34,7 +34,7 @@ var (
procNetDevFieldSep = regexp.MustCompile(` +`)
)
-func getNetDevStats(filter *netDevFilter, logger log.Logger) (netDevStats, error) {
+func getNetDevStats(filter *devFilter, logger log.Logger) (netDevStats, error) {
file, err := os.Open(procFilePath("net/dev"))
if err != nil {
return nil, err
@@ -44,7 +44,7 @@ func getNetDevStats(filter *netDevFilter, logger log.Logger) (netDevStats, error
return parseNetDevStats(file, filter, logger)
}
-func parseNetDevStats(r io.Reader, filter *netDevFilter, logger log.Logger) (netDevStats, error) {
+func parseNetDevStats(r io.Reader, filter *devFilter, logger log.Logger) (netDevStats, error) {
scanner := bufio.NewScanner(r)
scanner.Scan() // skip first header
scanner.Scan()
diff --git a/collector/netdev_linux_test.go b/collector/netdev_linux_test.go
index 8e227a2463..5032fd1e3b 100644
--- a/collector/netdev_linux_test.go
+++ b/collector/netdev_linux_test.go
@@ -27,7 +27,7 @@ func TestNetDevStatsIgnore(t *testing.T) {
}
defer file.Close()
- filter := newNetDevFilter("^veth", "")
+ filter := newdevFilter("^veth", "")
netStats, err := parseNetDevStats(file, &filter, log.NewNopLogger())
if err != nil {
@@ -70,7 +70,7 @@ func TestNetDevStatsAccept(t *testing.T) {
}
defer file.Close()
- filter := newNetDevFilter("", "^💩0$")
+ filter := newdevFilter("", "^💩0$")
netStats, err := parseNetDevStats(file, &filter, log.NewNopLogger())
if err != nil {
t.Fatal(err)
diff --git a/collector/netdev_openbsd.go b/collector/netdev_openbsd.go
index 2be10a3d6c..eb1f472c72 100644
--- a/collector/netdev_openbsd.go
+++ b/collector/netdev_openbsd.go
@@ -31,7 +31,7 @@ import (
*/
import "C"
-func getNetDevStats(filter *netDevFilter, logger log.Logger) (netDevStats, error) {
+func getNetDevStats(filter *devFilter, logger log.Logger) (netDevStats, error) {
netDev := netDevStats{}
var ifap, ifa *C.struct_ifaddrs
diff --git a/collector/netdev_openbsd_amd64.go b/collector/netdev_openbsd_amd64.go
index 8b2bfa0e73..f5ada40cbe 100644
--- a/collector/netdev_openbsd_amd64.go
+++ b/collector/netdev_openbsd_amd64.go
@@ -24,7 +24,7 @@ import (
"unsafe"
)
-func getNetDevStats(filter *netDevFilter, logger log.Logger) (netDevStats, error) {
+func getNetDevStats(filter *devFilter, logger log.Logger) (netDevStats, error) {
netDev := netDevStats{}
mib := [6]_C_int{unix.CTL_NET, unix.AF_ROUTE, 0, 0, unix.NET_RT_IFLIST, 0}
diff --git a/docs/node-mixin/dashboards/node.libsonnet b/docs/node-mixin/dashboards/node.libsonnet
index 57e585b8bc..898c912d56 100644
--- a/docs/node-mixin/dashboards/node.libsonnet
+++ b/docs/node-mixin/dashboards/node.libsonnet
@@ -1,256 +1,7 @@
-local grafana = import 'github.com/grafana/grafonnet-lib/grafonnet/grafana.libsonnet';
-local dashboard = grafana.dashboard;
-local row = grafana.row;
-local prometheus = grafana.prometheus;
-local template = grafana.template;
-local graphPanel = grafana.graphPanel;
-local promgrafonnet = import 'github.com/kubernetes-monitoring/kubernetes-mixin/lib/promgrafonnet/promgrafonnet.libsonnet';
-local gauge = promgrafonnet.gauge;
-
{
+ local nodemixin = import '../lib/prom-mixin.libsonnet',
grafanaDashboards+:: {
- 'nodes.json':
- local idleCPU =
- graphPanel.new(
- 'CPU Usage',
- datasource='$datasource',
- span=6,
- format='percentunit',
- max=1,
- min=0,
- stack=true,
- )
- .addTarget(prometheus.target(
- |||
- (
- (1 - sum without (mode) (rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode=~"idle|iowait|steal", instance="$instance"}[$__rate_interval])))
- / ignoring(cpu) group_left
- count without (cpu, mode) (node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle", instance="$instance"})
- )
- ||| % $._config,
- legendFormat='{{cpu}}',
- intervalFactor=5,
- ));
-
- local systemLoad =
- graphPanel.new(
- 'Load Average',
- datasource='$datasource',
- span=6,
- format='short',
- min=0,
- fill=0,
- )
- .addTarget(prometheus.target('node_load1{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='1m load average'))
- .addTarget(prometheus.target('node_load5{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='5m load average'))
- .addTarget(prometheus.target('node_load15{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='15m load average'))
- .addTarget(prometheus.target('count(node_cpu_seconds_total{%(nodeExporterSelector)s, instance="$instance", mode="idle"})' % $._config, legendFormat='logical cores'));
-
- local memoryGraph =
- graphPanel.new(
- 'Memory Usage',
- datasource='$datasource',
- span=9,
- format='bytes',
- stack=true,
- min=0,
- )
- .addTarget(prometheus.target(
- |||
- (
- node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance"}
- -
- node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance"}
- -
- node_memory_Buffers_bytes{%(nodeExporterSelector)s, instance="$instance"}
- -
- node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance"}
- )
- ||| % $._config, legendFormat='memory used'
- ))
- .addTarget(prometheus.target('node_memory_Buffers_bytes{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='memory buffers'))
- .addTarget(prometheus.target('node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='memory cached'))
- .addTarget(prometheus.target('node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='memory free'));
-
- // TODO: It would be nicer to have a gauge that gets a 0-1 range and displays it as a percentage 0%-100%.
- // This needs to be added upstream in the promgrafonnet library and then changed here.
- // NOTE: avg() is used to circumvent a label change caused by a node_exporter rollout.
- local memoryGauge = gauge.new(
- 'Memory Usage',
- |||
- 100 -
- (
- avg(node_memory_MemAvailable_bytes{%(nodeExporterSelector)s, instance="$instance"})
- /
- avg(node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance"})
- * 100
- )
- ||| % $._config,
- ).withLowerBeingBetter();
-
- local diskIO =
- graphPanel.new(
- 'Disk I/O',
- datasource='$datasource',
- span=6,
- min=0,
- fill=0,
- )
- // TODO: Does it make sense to have those three in the same panel?
- .addTarget(prometheus.target(
- 'rate(node_disk_read_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__rate_interval])' % $._config,
- legendFormat='{{device}} read',
- ))
- .addTarget(prometheus.target(
- 'rate(node_disk_written_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__rate_interval])' % $._config,
- legendFormat='{{device}} written',
- ))
- .addTarget(prometheus.target(
- 'rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__rate_interval])' % $._config,
- legendFormat='{{device}} io time',
- )) +
- {
- seriesOverrides: [
- {
- alias: '/ read| written/',
- yaxis: 1,
- },
- {
- alias: '/ io time/',
- yaxis: 2,
- },
- ],
- yaxes: [
- self.yaxe(format='bytes'),
- self.yaxe(format='s'),
- ],
- };
-
- // TODO: Somehow partition this by device while excluding read-only devices.
- local diskSpaceUsage =
- graphPanel.new(
- 'Disk Space Usage',
- datasource='$datasource',
- span=6,
- format='bytes',
- min=0,
- fill=1,
- stack=true,
- )
- .addTarget(prometheus.target(
- |||
- sum(
- max by (device) (
- node_filesystem_size_bytes{%(nodeExporterSelector)s, instance="$instance", %(fsSelector)s}
- -
- node_filesystem_avail_bytes{%(nodeExporterSelector)s, instance="$instance", %(fsSelector)s}
- )
- )
- ||| % $._config,
- legendFormat='used',
- ))
- .addTarget(prometheus.target(
- |||
- sum(
- max by (device) (
- node_filesystem_avail_bytes{%(nodeExporterSelector)s, instance="$instance", %(fsSelector)s}
- )
- )
- ||| % $._config,
- legendFormat='available',
- )) +
- {
- seriesOverrides: [
- {
- alias: 'used',
- color: '#E0B400',
- },
- {
- alias: 'available',
- color: '#73BF69',
- },
- ],
- };
-
- local networkReceived =
- graphPanel.new(
- 'Network Received',
- datasource='$datasource',
- span=6,
- format='bytes',
- min=0,
- fill=0,
- )
- .addTarget(prometheus.target(
- 'rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo"}[$__rate_interval])' % $._config,
- legendFormat='{{device}}',
- ));
-
- local networkTransmitted =
- graphPanel.new(
- 'Network Transmitted',
- datasource='$datasource',
- span=6,
- format='bytes',
- min=0,
- fill=0,
- )
- .addTarget(prometheus.target(
- 'rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo"}[$__rate_interval])' % $._config,
- legendFormat='{{device}}',
- ));
-
- dashboard.new(
- '%sNodes' % $._config.dashboardNamePrefix,
- time_from='now-1h',
- tags=($._config.dashboardTags),
- timezone='utc',
- refresh='30s',
- graphTooltip='shared_crosshair'
- )
- .addTemplate(
- {
- current: {
- text: 'default',
- value: 'default',
- },
- hide: 0,
- label: 'Data Source',
- name: 'datasource',
- options: [],
- query: 'prometheus',
- refresh: 1,
- regex: '',
- type: 'datasource',
- },
- )
- .addTemplate(
- template.new(
- 'instance',
- '$datasource',
- 'label_values(node_exporter_build_info{%(nodeExporterSelector)s}, instance)' % $._config,
- refresh='time',
- )
- )
- .addRow(
- row.new()
- .addPanel(idleCPU)
- .addPanel(systemLoad)
- )
- .addRow(
- row.new()
- .addPanel(memoryGraph)
- .addPanel(memoryGauge)
- )
- .addRow(
- row.new()
- .addPanel(diskIO)
- .addPanel(diskSpaceUsage)
- )
- .addRow(
- row.new()
- .addPanel(networkReceived)
- .addPanel(networkTransmitted)
- ),
+ 'nodes.json': nodemixin.new(config=$._config, platform='Linux').dashboard,
+ 'nodes-darwin.json': nodemixin.new(config=$._config, platform='Darwin').dashboard,
},
}
diff --git a/docs/node-mixin/jsonnetfile.json b/docs/node-mixin/jsonnetfile.json
index 46ebffe47c..721d4833a0 100644
--- a/docs/node-mixin/jsonnetfile.json
+++ b/docs/node-mixin/jsonnetfile.json
@@ -13,17 +13,8 @@
{
"source": {
"git": {
- "remote": "https://github.com/grafana/jsonnet-libs.git",
- "subdir": "grafana-builder"
- }
- },
- "version": "master"
- },
- {
- "source": {
- "git": {
- "remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin.git",
- "subdir": "lib/promgrafonnet"
+ "remote": "https://github.com/grafana/grafonnet-lib.git",
+ "subdir": "grafonnet-7.0"
}
},
"version": "master"
diff --git a/docs/node-mixin/lib/prom-mixin.libsonnet b/docs/node-mixin/lib/prom-mixin.libsonnet
new file mode 100644
index 0000000000..e545651d08
--- /dev/null
+++ b/docs/node-mixin/lib/prom-mixin.libsonnet
@@ -0,0 +1,353 @@
+local grafana = import 'github.com/grafana/grafonnet-lib/grafonnet/grafana.libsonnet';
+local dashboard = grafana.dashboard;
+local row = grafana.row;
+local prometheus = grafana.prometheus;
+local template = grafana.template;
+local graphPanel = grafana.graphPanel;
+local grafana70 = import 'github.com/grafana/grafonnet-lib/grafonnet-7.0/grafana.libsonnet';
+local gaugePanel = grafana70.panel.gauge;
+
+{
+
+ new(config=null, platform=null):: {
+
+ local prometheusDatasourceTemplate = {
+ current: {
+ text: 'default',
+ value: 'default',
+ },
+ hide: 0,
+ label: 'Data Source',
+ name: 'datasource',
+ options: [],
+ query: 'prometheus',
+ refresh: 1,
+ regex: '',
+ type: 'datasource',
+ },
+
+ local instanceTemplatePrototype =
+ template.new(
+ 'instance',
+ '$datasource',
+ '',
+ refresh='time',
+ label='Instance',
+ ),
+ local instanceTemplate =
+ if platform == 'Darwin' then
+ instanceTemplatePrototype
+ { query: 'label_values(node_uname_info{%(nodeExporterSelector)s, sysname="Darwin"}, instance)' % config }
+ else
+ instanceTemplatePrototype
+ { query: 'label_values(node_uname_info{%(nodeExporterSelector)s, sysname!="Darwin"}, instance)' % config },
+
+
+ local idleCPU =
+ graphPanel.new(
+ 'CPU Usage',
+ datasource='$datasource',
+ span=6,
+ format='percentunit',
+ max=1,
+ min=0,
+ stack=true,
+ )
+ .addTarget(prometheus.target(
+ |||
+ (
+ (1 - sum without (mode) (rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode=~"idle|iowait|steal", instance="$instance"}[$__rate_interval])))
+ / ignoring(cpu) group_left
+ count without (cpu, mode) (node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle", instance="$instance"})
+ )
+ ||| % config,
+ legendFormat='{{cpu}}',
+ intervalFactor=5,
+ )),
+
+ local systemLoad =
+ graphPanel.new(
+ 'Load Average',
+ datasource='$datasource',
+ span=6,
+ format='short',
+ min=0,
+ fill=0,
+ )
+ .addTarget(prometheus.target('node_load1{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='1m load average'))
+ .addTarget(prometheus.target('node_load5{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='5m load average'))
+ .addTarget(prometheus.target('node_load15{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='15m load average'))
+ .addTarget(prometheus.target('count(node_cpu_seconds_total{%(nodeExporterSelector)s, instance="$instance", mode="idle"})' % config, legendFormat='logical cores')),
+
+ local memoryGraphPanelPrototype =
+ graphPanel.new(
+ 'Memory Usage',
+ datasource='$datasource',
+ span=9,
+ format='bytes',
+ min=0,
+ ),
+ local memoryGraph =
+ if platform == 'Linux' then
+ memoryGraphPanelPrototype { stack: true }
+ .addTarget(prometheus.target(
+ |||
+ (
+ node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance"}
+ -
+ node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance"}
+ -
+ node_memory_Buffers_bytes{%(nodeExporterSelector)s, instance="$instance"}
+ -
+ node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance"}
+ )
+ ||| % config,
+ legendFormat='memory used'
+ ))
+ .addTarget(prometheus.target('node_memory_Buffers_bytes{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='memory buffers'))
+ .addTarget(prometheus.target('node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='memory cached'))
+ .addTarget(prometheus.target('node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='memory free'))
+ else if platform == 'Darwin' then
+ // not useful to stack
+ memoryGraphPanelPrototype { stack: false }
+ .addTarget(prometheus.target('node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='Physical Memory'))
+ .addTarget(prometheus.target(
+ |||
+ (
+ node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance"} -
+ node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance"} +
+ node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance"} +
+ node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance"}
+ )
+ ||| % config, legendFormat='Memory Used'
+ ))
+ .addTarget(prometheus.target(
+ |||
+ (
+ node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance"} -
+ node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance"}
+ )
+ ||| % config, legendFormat='App Memory'
+ ))
+ .addTarget(prometheus.target('node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='Wired Memory'))
+ .addTarget(prometheus.target('node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='Compressed')),
+
+ // NOTE: avg() is used to circumvent a label change caused by a node_exporter rollout.
+ local memoryGaugePanelPrototype =
+ gaugePanel.new(
+ title='Memory Usage',
+ datasource='$datasource',
+ )
+ .addThresholdStep('rgba(50, 172, 45, 0.97)')
+ .addThresholdStep('rgba(237, 129, 40, 0.89)', 80)
+ .addThresholdStep('rgba(245, 54, 54, 0.9)', 90)
+ .setFieldConfig(max=100, min=0, unit='percent')
+ + {
+ span: 3,
+ },
+
+ local memoryGauge =
+ if platform == 'Linux' then
+ memoryGaugePanelPrototype
+
+ .addTarget(prometheus.target(
+ |||
+ 100 -
+ (
+ avg(node_memory_MemAvailable_bytes{%(nodeExporterSelector)s, instance="$instance"}) /
+ avg(node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance"})
+ * 100
+ )
+ ||| % config,
+ ))
+
+ else if platform == 'Darwin' then
+ memoryGaugePanelPrototype
+ .addTarget(prometheus.target(
+ |||
+ (
+ (
+ avg(node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance"}) -
+ avg(node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance"}) +
+ avg(node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance"}) +
+ avg(node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance"})
+ ) /
+ avg(node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance"})
+ )
+ *
+ 100
+ ||| % config
+ )),
+
+ local diskIO =
+ graphPanel.new(
+ 'Disk I/O',
+ datasource='$datasource',
+ span=6,
+ min=0,
+ fill=0,
+ )
+ // TODO: Does it make sense to have those three in the same panel?
+ .addTarget(prometheus.target(
+ 'rate(node_disk_read_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__rate_interval])' % config,
+ legendFormat='{{device}} read',
+ ))
+ .addTarget(prometheus.target(
+ 'rate(node_disk_written_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__rate_interval])' % config,
+ legendFormat='{{device}} written',
+ ))
+ .addTarget(prometheus.target(
+ 'rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__rate_interval])' % config,
+ legendFormat='{{device}} io time',
+ )) +
+ {
+ seriesOverrides: [
+ {
+ alias: '/ read| written/',
+ yaxis: 1,
+ },
+ {
+ alias: '/ io time/',
+ yaxis: 2,
+ },
+ ],
+ yaxes: [
+ self.yaxe(format='bytes'),
+ self.yaxe(format='s'),
+ ],
+ },
+
+ // TODO: Somehow partition this by device while excluding read-only devices.
+ local diskSpaceUsage =
+ graphPanel.new(
+ 'Disk Space Usage',
+ datasource='$datasource',
+ span=6,
+ format='bytes',
+ min=0,
+ fill=1,
+ stack=true,
+ )
+ .addTarget(prometheus.target(
+ |||
+ sum(
+ max by (device) (
+ node_filesystem_size_bytes{%(nodeExporterSelector)s, instance="$instance", %(fsSelector)s}
+ -
+ node_filesystem_avail_bytes{%(nodeExporterSelector)s, instance="$instance", %(fsSelector)s}
+ )
+ )
+ ||| % config,
+ legendFormat='used',
+ ))
+ .addTarget(prometheus.target(
+ |||
+ sum(
+ max by (device) (
+ node_filesystem_avail_bytes{%(nodeExporterSelector)s, instance="$instance", %(fsSelector)s}
+ )
+ )
+ ||| % config,
+ legendFormat='available',
+ )) +
+ {
+ seriesOverrides: [
+ {
+ alias: 'used',
+ color: '#E0B400',
+ },
+ {
+ alias: 'available',
+ color: '#73BF69',
+ },
+ ],
+ },
+
+ local networkReceived =
+ graphPanel.new(
+ 'Network Received',
+ datasource='$datasource',
+ span=6,
+ format='bytes',
+ min=0,
+ fill=0,
+ )
+ .addTarget(prometheus.target(
+ 'rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo"}[$__rate_interval])' % config,
+ legendFormat='{{device}}',
+ )),
+
+ local networkTransmitted =
+ graphPanel.new(
+ 'Network Transmitted',
+ datasource='$datasource',
+ span=6,
+ format='bytes',
+ min=0,
+ fill=0,
+ )
+ .addTarget(prometheus.target(
+ 'rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo"}[$__rate_interval])' % config,
+ legendFormat='{{device}}',
+ )),
+
+ local cpuRow =
+ row.new('CPU')
+ .addPanel(idleCPU)
+ .addPanel(systemLoad),
+
+ local memoryRow =
+ row.new('Memory')
+ .addPanel(memoryGraph)
+ .addPanel(memoryGauge),
+
+ local diskRow =
+ row.new('Disk')
+ .addPanel(diskIO)
+ .addPanel(diskSpaceUsage),
+
+ local networkRow =
+ row.new('Network')
+ .addPanel(networkReceived)
+ .addPanel(networkTransmitted),
+
+ local rows =
+ [
+ cpuRow,
+ memoryRow,
+ diskRow,
+ networkRow,
+ ],
+
+ local templates =
+ [
+ prometheusDatasourceTemplate,
+ instanceTemplate,
+ ],
+
+
+ dashboard: if platform == 'Linux' then
+ dashboard.new(
+ '%sNodes' % config.dashboardNamePrefix,
+ time_from='now-1h',
+ tags=(config.dashboardTags),
+ timezone='utc',
+ refresh='30s',
+ graphTooltip='shared_crosshair'
+ )
+ .addTemplates(templates)
+ .addRows(rows)
+ else if platform == 'Darwin' then
+ dashboard.new(
+ '%sMacOS' % config.dashboardNamePrefix,
+ time_from='now-1h',
+ tags=(config.dashboardTags),
+ timezone='utc',
+ refresh='30s',
+ graphTooltip='shared_crosshair'
+ )
+ .addTemplates(templates)
+ .addRows(rows),
+
+ },
+}