From 5cdcba60e45ca01f7bfc3420b78245816b416f4b Mon Sep 17 00:00:00 2001 From: Peter Bueschel Date: Mon, 30 Sep 2019 19:39:20 +0200 Subject: [PATCH 1/2] Two new states will be added to the tcpstat collector called rx_queued_bytes and tx_queued_bytes. For UDP datagrams an additional collector 'udp_queues' can be used to expose the total lengths of the tx_queue and rx_queue. @SuperQ and @discordianfish this changes gives us the option to check for overloaded UDP + TCP processing. The names of the new TCP states and the UDP metric can be discussed. The current reasons are just: I don't want to add another collector for the same exposed file, so I just added the new states to the tcpstat collector. I chose the name 'udp_queue' instead of 'udpstat' as UDP has no state. Signed-off-by: Peter Bueschel --- README.md | 1 + collector/fixtures/proc/net/tcpstat | 4 +- collector/fixtures/proc/net/udp | 2 + collector/tcpstat_linux.go | 28 +++++- collector/tcpstat_linux_test.go | 64 +++++++++++- collector/udpqueues_linux.go | 118 ++++++++++++++++++++++ collector/udpqueues_linux_test.go | 151 ++++++++++++++++++++++++++++ 7 files changed, 364 insertions(+), 4 deletions(-) create mode 100644 collector/fixtures/proc/net/udp create mode 100644 collector/udpqueues_linux.go create mode 100644 collector/udpqueues_linux_test.go diff --git a/README.md b/README.md index 7a4b9f407f..d31db6fd01 100644 --- a/README.md +++ b/README.md @@ -99,6 +99,7 @@ runit | Exposes service status from [runit](http://smarden.org/runit/). | _any_ supervisord | Exposes service status from [supervisord](http://supervisord.org/). | _any_ systemd | Exposes service and system status from [systemd](http://www.freedesktop.org/wiki/Software/systemd/). | Linux tcpstat | Exposes TCP connection status information from `/proc/net/tcp` and `/proc/net/tcp6`. (Warning: the current version has potential performance issues in high load situations.) | Linux +udp_queues | Exposes UDP total lengths of the rx_queue and tx_queue from `/proc/net/udp` and `/proc/net/udp6`. | Linux wifi | Exposes WiFi device and station statistics. | Linux perf | Exposes perf based metrics (Warning: Metrics are dependent on kernel configuration and settings). | Linux diff --git a/collector/fixtures/proc/net/tcpstat b/collector/fixtures/proc/net/tcpstat index 8b3777a969..352c00bbf3 100644 --- a/collector/fixtures/proc/net/tcpstat +++ b/collector/fixtures/proc/net/tcpstat @@ -1,3 +1,3 @@ sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode - 0: 00000000:0016 00000000:0000 0A 00000000:00000000 00:00000000 00000000 0 0 2740 1 ffff88003d3af3c0 100 0 0 10 0 - 1: 0F02000A:0016 0202000A:8B6B 01 00000000:00000000 02:000AC99B 00000000 0 0 3652 4 ffff88003d3ae040 21 4 31 47 46 + 0: 00000000:0016 00000000:0000 0A 00000015:00000000 00:00000000 00000000 0 0 2740 1 ffff88003d3af3c0 100 0 0 10 0 + 1: 0F02000A:0016 0202000A:8B6B 01 00000015:00000001 02:000AC99B 00000000 0 0 3652 4 ffff88003d3ae040 21 4 31 47 46 diff --git a/collector/fixtures/proc/net/udp b/collector/fixtures/proc/net/udp new file mode 100644 index 0000000000..3c5052400a --- /dev/null +++ b/collector/fixtures/proc/net/udp @@ -0,0 +1,2 @@ + sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode + 0: 00000000:0016 00000000:0000 0A 00000015:00000000 00:00000000 00000000 0 0 2740 1 ffff88003d3af3c0 100 0 0 10 0 diff --git a/collector/tcpstat_linux.go b/collector/tcpstat_linux.go index cc4e960b0f..5641111cbf 100644 --- a/collector/tcpstat_linux.go +++ b/collector/tcpstat_linux.go @@ -51,6 +51,10 @@ const ( tcpListen // TCP_CLOSING tcpClosing + // TCP_RX_BUFFER + tcpRxQueuedBytes + // TCP_TX_BUFFER + tcpTxQueuedBytes ) type tcpStatCollector struct { @@ -119,16 +123,34 @@ func parseTCPStats(r io.Reader) (map[tcpConnectionState]float64, error) { if len(parts) == 0 { continue } - if len(parts) < 4 { + if len(parts) < 5 { return nil, fmt.Errorf("invalid TCP stats line: %q", line) } + qu := strings.Split(parts[4], ":") + if len(qu) < 2 { + return nil, fmt.Errorf("cannot parse tx_queues and rx_queues: %q", line) + } + + tx, err := strconv.ParseUint(qu[0], 16, 64) + if err != nil { + return nil, err + } + tcpStats[tcpConnectionState(tcpTxQueuedBytes)] += float64(tx) + + rx, err := strconv.ParseUint(qu[1], 16, 64) + if err != nil { + return nil, err + } + tcpStats[tcpConnectionState(tcpRxQueuedBytes)] += float64(rx) + st, err := strconv.ParseInt(parts[3], 16, 8) if err != nil { return nil, err } tcpStats[tcpConnectionState(st)]++ + } return tcpStats, nil @@ -158,6 +180,10 @@ func (st tcpConnectionState) String() string { return "listen" case tcpClosing: return "closing" + case tcpRxQueuedBytes: + return "rx_queued_bytes" + case tcpTxQueuedBytes: + return "tx_queued_bytes" default: return "unknown" } diff --git a/collector/tcpstat_linux_test.go b/collector/tcpstat_linux_test.go index f4c3b36c8c..b609b84679 100644 --- a/collector/tcpstat_linux_test.go +++ b/collector/tcpstat_linux_test.go @@ -28,8 +28,27 @@ func Test_parseTCPStatsError(t *testing.T) { name: "too few fields", in: "sl local_address\n 0: 00000000:0016", }, + { + name: "missing colon in tx-rx field", + in: "sl local_address rem_address st tx_queue rx_queue\n" + + " 1: 0F02000A:0016 0202000A:8B6B 01 0000000000000001", + }, + { + name: "tx parsing issue", + in: "sl local_address rem_address st tx_queue rx_queue\n" + + " 1: 0F02000A:0016 0202000A:8B6B 01 0000000x:00000001", + }, + { + name: "rx parsing issue", + in: "sl local_address rem_address st tx_queue rx_queue\n" + + " 1: 0F02000A:0016 0202000A:8B6B 01 00000000:0000000x", + }, + { + name: "state parsing issue", + in: "sl local_address rem_address st tx_queue rx_queue\n" + + " 1: 0F02000A:0016 0202000A:8B6B 0H 00000000:00000001", + }, } - for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { if _, err := parseTCPStats(strings.NewReader(tt.in)); err == nil { @@ -40,6 +59,14 @@ func Test_parseTCPStatsError(t *testing.T) { } func TestTCPStat(t *testing.T) { + + noFile, _ := os.Open("follow the white rabbit") + defer noFile.Close() + + if _, err := parseTCPStats(noFile); err == nil { + t.Fatal("expected an error, but none occurred") + } + file, err := os.Open("fixtures/proc/net/tcpstat") if err != nil { t.Fatal(err) @@ -58,4 +85,39 @@ func TestTCPStat(t *testing.T) { if want, got := 1, int(tcpStats[tcpListen]); want != got { t.Errorf("want tcpstat number of listen state %d, got %d", want, got) } + + if want, got := 42, int(tcpStats[tcpTxQueuedBytes]); want != got { + t.Errorf("want tcpstat number of bytes in tx queue %d, got %d", want, got) + } + if want, got := 1, int(tcpStats[tcpRxQueuedBytes]); want != got { + t.Errorf("want tcpstat number of bytes in rx queue %d, got %d", want, got) + } + +} + +func Test_getTCPStats(t *testing.T) { + type args struct { + statsFile string + } + tests := []struct { + name string + args args + wantErr bool + }{ + { + name: "file not found", + args: args{statsFile: "somewhere over the rainbow"}, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, err := getTCPStats(tt.args.statsFile) + if (err != nil) != tt.wantErr { + t.Errorf("getTCPStats() error = %v, wantErr %v", err, tt.wantErr) + return + } + // other cases are covered by TestTCPStat() + }) + } } diff --git a/collector/udpqueues_linux.go b/collector/udpqueues_linux.go new file mode 100644 index 0000000000..504fd038f1 --- /dev/null +++ b/collector/udpqueues_linux.go @@ -0,0 +1,118 @@ +// Copyright 2015 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build !noudp_queues + +package collector + +import ( + "fmt" + "io" + "io/ioutil" + "os" + "strconv" + "strings" + + "github.com/prometheus/client_golang/prometheus" +) + +type udpQueuesCollector struct { + desc typedDesc +} + +func init() { + registerCollector("udp_queues", defaultDisabled, NewUDPqueuesCollector) +} + +// NewUDPqueuesCollector returns a new Collector exposing network udp queued bytes. +func NewUDPqueuesCollector() (Collector, error) { + return &udpQueuesCollector{ + desc: typedDesc{prometheus.NewDesc( + prometheus.BuildFQName(namespace, "udp", "queues"), + "Number of allocated memory in the kernel for UDP datagrams in bytes.", + []string{"queue"}, nil, + ), prometheus.GaugeValue}, + }, nil +} + +func (c *udpQueuesCollector) Update(ch chan<- prometheus.Metric) error { + updQueues, err := getUDPqueues(procFilePath("net/udp")) + if err != nil { + return fmt.Errorf("couldn't get upd queued bytes: %s", err) + } + + // if enabled ipv6 system + udp6File := procFilePath("net/udp6") + if _, hasIPv6 := os.Stat(udp6File); hasIPv6 == nil { + udp6Queues, err := getUDPqueues(udp6File) + if err != nil { + return fmt.Errorf("couldn't get udp6 queued bytes: %s", err) + } + + for qu, value := range udp6Queues { + updQueues[qu] += value + } + } + + for qu, value := range updQueues { + ch <- c.desc.mustNewConstMetric(value, qu) + } + return nil +} + +func getUDPqueues(statsFile string) (map[string]float64, error) { + file, err := os.Open(statsFile) + if err != nil { + return nil, err + } + defer file.Close() + + return parseUDPqueues(file) +} + +func parseUDPqueues(r io.Reader) (map[string]float64, error) { + updQueues := map[string]float64{} + contents, err := ioutil.ReadAll(r) + if err != nil { + return nil, err + } + + for _, line := range strings.Split(string(contents), "\n")[1:] { + fields := strings.Fields(line) + if len(fields) == 0 { + continue + } + if len(fields) < 5 { + return nil, fmt.Errorf("invalid line in file: %q", line) + } + + qu := strings.Split(fields[4], ":") + if len(qu) < 2 { + return nil, fmt.Errorf("cannot parse tx_queues and rx_queues: %q", line) + } + + tx, err := strconv.ParseUint(qu[0], 16, 64) + if err != nil { + return nil, err + } + updQueues["tx_queue"] += float64(tx) + + rx, err := strconv.ParseUint(qu[1], 16, 64) + if err != nil { + return nil, err + } + updQueues["rx_queue"] += float64(rx) + } + + return updQueues, nil +} diff --git a/collector/udpqueues_linux_test.go b/collector/udpqueues_linux_test.go new file mode 100644 index 0000000000..ad584ff132 --- /dev/null +++ b/collector/udpqueues_linux_test.go @@ -0,0 +1,151 @@ +// Copyright 2015 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build !noudpqueues + +package collector + +import ( + "io" + "os" + "strings" + "testing" +) + +func Test_parseUDPqueues(t *testing.T) { + noFile, _ := os.Open("follow the white rabbit") + defer noFile.Close() + + if _, err := parseUDPqueues(noFile); err == nil { + t.Fatal("expected an error, but none occurred") + } + + type args struct { + r io.Reader + } + tests := []struct { + name string + args args + want map[string]float64 + wantErr bool + }{ + { + name: "reading valid lines, no issue should happend", + args: args{ + strings.NewReader( + "sl local_address rem_address st tx_queue rx_queue \n" + + "1: 00000000:0000 00000000:0000 07 00000000:00000001 \n" + + "2: 00000000:0000 00000000:0000 07 00000002:00000001 \n"), + }, + want: map[string]float64{"tx_queue": 2, "rx_queue": 2}, + wantErr: false, + }, + { + name: "error case - invalid line - number of fields < 5", + args: args{ + strings.NewReader( + "sl local_address rem_address st tx_queue rx_queue \n" + + "1: 00000000:0000 00000000:0000 07 00000000:00000001 \n" + + "2: 00000000:0000 00000000:0000 07 \n"), + }, + want: nil, + wantErr: true, + }, + { + name: "error case - cannot parse line - missing colon", + args: args{ + strings.NewReader( + "sl local_address rem_address st tx_queue rx_queue \n" + + "1: 00000000:0000 00000000:0000 07 00000000:00000001 \n" + + "2: 00000000:0000 00000000:0000 07 0000000200000001 \n"), + }, + want: nil, + wantErr: true, + }, + { + name: "error case - parse tx_queue - not an valid hex", + args: args{ + strings.NewReader( + "sl local_address rem_address st tx_queue rx_queue \n" + + "1: 00000000:0000 00000000:0000 07 0000000G:00000001 \n" + + "2: 00000000:0000 00000000:0000 07 00000002:00000001 \n"), + }, + want: nil, + wantErr: true, + }, + { + name: "error case - parse rx_queue - not an valid hex", + args: args{ + strings.NewReader( + "sl local_address rem_address st tx_queue rx_queue \n" + + "1: 00000000:0000 00000000:0000 07 00000000:00000001 \n" + + "2: 00000000:0000 00000000:0000 07 00000002:0000000G \n"), + }, + want: nil, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := parseUDPqueues(tt.args.r) + if (err != nil) != tt.wantErr { + t.Errorf("parseUDPqueues() error = %v, wantErr %v", err, tt.wantErr) + return + } + + if len(tt.want) != len(got) { + t.Errorf("parseUDPqueues() = %v, want %v", got, tt.want) + } + for k, v := range tt.want { + if _, ok := got[k]; !ok { + t.Errorf("parseUDPqueues() = %v, want %v", got, tt.want) + } + if got[k] != v { + t.Errorf("parseUDPqueues() = %v, want %v", got, tt.want) + } + } + }) + } +} + +func Test_getUDPqueues(t *testing.T) { + type args struct { + statsFile string + } + tests := []struct { + name string + args args + wantErr bool + }{ + { + name: "file found", + args: args{statsFile: "fixtures/proc/net/udp"}, + wantErr: false, + }, + { + name: "error case - file not found", + args: args{statsFile: "somewhere over the rainbow"}, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, err := getUDPqueues(tt.args.statsFile) + if (err != nil) != tt.wantErr { + t.Errorf("getUDPqueues() error = %v, wantErr %v", err, tt.wantErr) + return + } + // other cases are covered by Test_getUDPqueues() + }) + } +} From 30ec75d60d804763652a8a727c95c18624877c67 Mon Sep 17 00:00:00 2001 From: peterbueschel Date: Mon, 30 Sep 2019 19:59:29 +0200 Subject: [PATCH 2/2] Update udpqueues_linux_test.go Fix typo. --- collector/udpqueues_linux_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/collector/udpqueues_linux_test.go b/collector/udpqueues_linux_test.go index ad584ff132..246553e11b 100644 --- a/collector/udpqueues_linux_test.go +++ b/collector/udpqueues_linux_test.go @@ -40,7 +40,7 @@ func Test_parseUDPqueues(t *testing.T) { wantErr bool }{ { - name: "reading valid lines, no issue should happend", + name: "reading valid lines, no issue should happen", args: args{ strings.NewReader( "sl local_address rem_address st tx_queue rx_queue \n" +