From 14c4307770fe7c4f81e7961b09bc955f54791335 Mon Sep 17 00:00:00 2001 From: Daniel Hodges Date: Sat, 4 Apr 2020 11:33:33 -0400 Subject: [PATCH 1/3] Add tracepoint collector option for perf collector Signed-off-by: Daniel Hodges --- collector/perf_linux.go | 186 ++++++++++++++++++++++++++++++++--- collector/perf_linux_test.go | 70 ++++++++++++- 2 files changed, 239 insertions(+), 17 deletions(-) diff --git a/collector/perf_linux.go b/collector/perf_linux.go index bb8716ad52..aeecd21ff4 100644 --- a/collector/perf_linux.go +++ b/collector/perf_linux.go @@ -20,8 +20,10 @@ import ( "strings" "github.com/go-kit/kit/log" + "github.com/go-kit/kit/log/level" "github.com/hodgesds/perf-utils" "github.com/prometheus/client_golang/prometheus" + "golang.org/x/sys/unix" kingpin "gopkg.in/alecthomas/kingpin.v2" ) @@ -30,27 +32,30 @@ const ( ) var ( - perfCPUsFlag = kingpin.Flag("collector.perf.cpus", "List of CPUs from which perf metrics should be collected").Default("").String() + perfCPUsFlag = kingpin.Flag("collector.perf.cpus", "List of CPUs from which perf metrics should be collected").Default("").String() + perfTracepointsFlag = kingpin.Flag("collector.perf.tracepoints", "perf tracepoint that should be collected").Default("").String() ) func init() { registerCollector(perfSubsystem, defaultDisabled, NewPerfCollector) } -// perfCollector is a Collector that uses the perf subsystem to collect -// metrics. It uses perf_event_open an ioctls for profiling. Due to the fact -// that the perf subsystem is highly dependent on kernel configuration and -// settings not all profiler values may be exposed on the target system at any -// given time. -type perfCollector struct { - hwProfilerCPUMap map[*perf.HardwareProfiler]int - swProfilerCPUMap map[*perf.SoftwareProfiler]int - cacheProfilerCPUMap map[*perf.CacheProfiler]int - perfHwProfilers map[int]*perf.HardwareProfiler - perfSwProfilers map[int]*perf.SoftwareProfiler - perfCacheProfilers map[int]*perf.CacheProfiler - desc map[string]*prometheus.Desc - logger log.Logger +// perfTracepointFlagToTracepoints returns the set of configured tracepoints. +func perfTracepointFlagToTracepoints(tracepointFlag string) ([]*perfTracepoint, error) { + tracepointStrs := strings.Split(tracepointFlag, ",") + tracepoints := make([]*perfTracepoint, len(tracepointStrs)) + + for i, tracepoint := range tracepointStrs { + split := strings.Split(tracepoint, ":") + if len(split) != 2 { + return nil, fmt.Errorf("Invalid tracepoint config %v", tracepoint) + } + tracepoints[i] = &perfTracepoint{ + subsystem: split[0], + event: split[1], + } + } + return tracepoints, nil } // perfCPUFlagToCPUs returns a set of CPUs for the perf collectors to monitor. @@ -98,6 +103,144 @@ func perfCPUFlagToCPUs(cpuFlag string) ([]int, error) { return cpus, nil } +// perfTracepoint is a struct for holding tracepoint information. +type perfTracepoint struct { + subsystem string + event string +} + +// label returns the tracepoint name in the format of subsystem_tracepoint. +func (t *perfTracepoint) label() string { + return t.subsystem + "_" + t.event +} + +// tracepoint returns the tracepoint name in the format of subsystem:tracepoint. +func (t *perfTracepoint) tracepoint() string { + return t.subsystem + ":" + t.event +} + +// perfCollector is a Collector that uses the perf subsystem to collect +// metrics. It uses perf_event_open an ioctls for profiling. Due to the fact +// that the perf subsystem is highly dependent on kernel configuration and +// settings not all profiler values may be exposed on the target system at any +// given time. +type perfCollector struct { + hwProfilerCPUMap map[*perf.HardwareProfiler]int + swProfilerCPUMap map[*perf.SoftwareProfiler]int + cacheProfilerCPUMap map[*perf.CacheProfiler]int + perfHwProfilers map[int]*perf.HardwareProfiler + perfSwProfilers map[int]*perf.SoftwareProfiler + perfCacheProfilers map[int]*perf.CacheProfiler + desc map[string]*prometheus.Desc + logger log.Logger + tracepointCollector *perfTracepointCollector +} + +type perfTracepointCollector struct { + // desc is the mapping of subsystem to tracepoint *prometheus.Desc. + descs map[string]map[string]*prometheus.Desc + // collection order is the sorted configured collection order of the profiler. + collectionOrder []string + + logger log.Logger + profilers map[int]perf.GroupProfiler +} + +// update is used collect all tracepoints across all tracepoint profilers. +func (c *perfTracepointCollector) update(ch chan<- prometheus.Metric) error { + for cpu := range c.profilers { + if err := c.updateCPU(cpu, ch); err != nil { + return err + } + } + return nil +} + +// updateCPU is used to update metrics per CPU profiler. +func (c *perfTracepointCollector) updateCPU(cpu int, ch chan<- prometheus.Metric) error { + cpuStr := fmt.Sprintf("%d", cpu) + profiler := c.profilers[cpu] + p, err := profiler.Profile() + if err != nil { + level.Error(c.logger).Log("msg", "Failed to collect tracepoint profile", "err", err) + return err + } + + for i, value := range p.Values { + // Get the Desc from the ordered group value. + descKey := c.collectionOrder[i] + descKeySlice := strings.Split(descKey, ":") + ch <- prometheus.MustNewConstMetric( + c.descs[descKeySlice[0]][descKeySlice[1]], + prometheus.CounterValue, + float64(value), + cpuStr, + ) + } + return nil +} + +// newPerfTracepointCollector returns a configured perfTracepointCollector. +func newPerfTracepointCollector( + logger log.Logger, + tracepointFlag string, + cpus []int, +) (*perfTracepointCollector, error) { + tracepoints, err := perfTracepointFlagToTracepoints(tracepointFlag) + if err != nil { + return nil, err + } + + collectionOrder := make([]string, len(tracepoints)) + descs := map[string]map[string]*prometheus.Desc{} + eventAttrs := make([]unix.PerfEventAttr, len(tracepoints)) + + for i, tracepoint := range tracepoints { + eventAttr, err := perf.TracepointEventAttr(tracepoint.subsystem, tracepoint.event) + if err != nil { + return nil, err + } + eventAttrs[i] = *eventAttr + collectionOrder[i] = tracepoint.tracepoint() + if _, ok := descs[tracepoint.subsystem]; !ok { + descs[tracepoint.subsystem] = map[string]*prometheus.Desc{} + } + descs[tracepoint.subsystem][tracepoint.event] = prometheus.NewDesc( + prometheus.BuildFQName( + namespace, + perfSubsystem, + tracepoint.label(), + ), + "Perf tracepoint "+tracepoint.tracepoint(), + []string{"cpu"}, + nil, + ) + } + + profilers := make(map[int]perf.GroupProfiler, len(cpus)) + for _, cpu := range cpus { + profiler, err := perf.NewGroupProfiler(-1, cpu, 0, eventAttrs...) + if err != nil { + return nil, err + } + profilers[cpu] = profiler + } + + c := &perfTracepointCollector{ + descs: descs, + collectionOrder: collectionOrder, + profilers: profilers, + logger: logger, + } + + for _, profiler := range c.profilers { + if err := profiler.Start(); err != nil { + return nil, err + } + } + return c, nil +} + // NewPerfCollector returns a new perf based collector, it creates a profiler // per CPU. func NewPerfCollector(logger log.Logger) (Collector, error) { @@ -127,6 +270,16 @@ func NewPerfCollector(logger log.Logger) (Collector, error) { } } + // First configure any tracepoints. + if *perfTracepointsFlag != "" { + tracepointCollector, err := newPerfTracepointCollector(logger, *perfTracepointsFlag, cpus) + if err != nil { + return nil, err + } + collector.tracepointCollector = tracepointCollector + } + + // Configure all profilers for the specified CPUs. for _, cpu := range cpus { // Use -1 to profile all processes on the CPU, see: // man perf_event_open @@ -408,6 +561,9 @@ func (c *perfCollector) Update(ch chan<- prometheus.Metric) error { if err := c.updateCacheStats(ch); err != nil { return err } + if c.tracepointCollector != nil { + return c.tracepointCollector.update(ch) + } return nil } diff --git a/collector/perf_linux_test.go b/collector/perf_linux_test.go index eecfab9c4c..030b49f1fd 100644 --- a/collector/perf_linux_test.go +++ b/collector/perf_linux_test.go @@ -119,8 +119,74 @@ func TestPerfCPUFlagToCPUs(t *testing.T) { if test.exCpus[i] != cpus[i] { t.Fatalf( "expected cpus %v, got %v", - test.exCpus, - cpus, + test.exCpus[i], + cpus[i], + ) + } + } + }) + } +} + +func TestPerfTracepointFlagToTracepoints(t *testing.T) { + tests := []struct { + name string + flag string + exTracepoints []*perfTracepoint + errStr string + }{ + { + name: "valid single tracepoint", + flag: "sched:sched_kthread_stop", + exTracepoints: []*perfTracepoint{ + { + subsystem: "sched", + event: "sched_kthread_stop", + }, + }, + }, + { + name: "valid multiple tracepoints", + flag: "sched:sched_kthread_stop,sched:sched_process_fork", + exTracepoints: []*perfTracepoint{ + { + subsystem: "sched", + event: "sched_kthread_stop", + }, + { + subsystem: "sched", + event: "sched_process_fork", + }, + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + tracepoints, err := perfTracepointFlagToTracepoints(test.flag) + if test.errStr != "" { + if err != nil { + t.Fatal("expected error to not be nil") + } + if test.errStr != err.Error() { + t.Fatalf( + "expected error %q, got %q", + test.errStr, + err.Error(), + ) + } + return + } + if err != nil { + t.Fatal(err) + } + for i := range tracepoints { + if test.exTracepoints[i].event != tracepoints[i].event && + test.exTracepoints[i].subsystem != tracepoints[i].subsystem { + t.Fatalf( + "expected tracepoint %v, got %v", + test.exTracepoints[i], + tracepoints[i], ) } } From 440018541a2b064cbba538921fb48a3b6fbb3343 Mon Sep 17 00:00:00 2001 From: Daniel Hodges Date: Sat, 4 Apr 2020 12:10:28 -0400 Subject: [PATCH 2/3] Update readme Signed-off-by: Daniel Hodges --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index 3a1546e695..9638dc62ae 100644 --- a/README.md +++ b/README.md @@ -96,6 +96,13 @@ configuration is zero indexed and can also take a stride value `--collector.perf --collector.perf.cpus=1-10:5`, would collect on CPUs 1, 5, and 10. +The perf collector is also able to collect +[tracepoint](https://www.kernel.org/doc/html/latest/core-api/tracepoint.html) +counts when using the `--collector.perf.tracepoints` flag. Tracepoints can be +found using [`perf list`](http://man7.org/linux/man-pages/man1/perf.1.html) or +from debugfs. And example usage of this would be +`--collector.perf.tracepoints="sched:sched_process_exec,sched:sched_process_exit"`. + Name | Description | OS ---------|-------------|---- From 0dc5d02526f9bb6cf40d10f6c0ceded27b5fbb45 Mon Sep 17 00:00:00 2001 From: Daniel Hodges Date: Tue, 14 Apr 2020 22:07:21 -0400 Subject: [PATCH 3/3] update flags Signed-off-by: Daniel Hodges --- README.md | 4 ++-- collector/perf_linux.go | 19 +++++++++---------- collector/perf_linux_test.go | 6 +++--- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 9638dc62ae..6144c79e3d 100644 --- a/README.md +++ b/README.md @@ -98,10 +98,10 @@ configuration is zero indexed and can also take a stride value The perf collector is also able to collect [tracepoint](https://www.kernel.org/doc/html/latest/core-api/tracepoint.html) -counts when using the `--collector.perf.tracepoints` flag. Tracepoints can be +counts when using the `--collector.perf.tracepoint` flag. Tracepoints can be found using [`perf list`](http://man7.org/linux/man-pages/man1/perf.1.html) or from debugfs. And example usage of this would be -`--collector.perf.tracepoints="sched:sched_process_exec,sched:sched_process_exit"`. +`--collector.perf.tracepoint="sched:sched_process_exec"`. Name | Description | OS diff --git a/collector/perf_linux.go b/collector/perf_linux.go index aeecd21ff4..1351ede41c 100644 --- a/collector/perf_linux.go +++ b/collector/perf_linux.go @@ -32,8 +32,8 @@ const ( ) var ( - perfCPUsFlag = kingpin.Flag("collector.perf.cpus", "List of CPUs from which perf metrics should be collected").Default("").String() - perfTracepointsFlag = kingpin.Flag("collector.perf.tracepoints", "perf tracepoint that should be collected").Default("").String() + perfCPUsFlag = kingpin.Flag("collector.perf.cpus", "List of CPUs from which perf metrics should be collected").Default("").String() + perfTracepointFlag = kingpin.Flag("collector.perf.tracepoint", "perf tracepoint that should be collected").Strings() ) func init() { @@ -41,11 +41,10 @@ func init() { } // perfTracepointFlagToTracepoints returns the set of configured tracepoints. -func perfTracepointFlagToTracepoints(tracepointFlag string) ([]*perfTracepoint, error) { - tracepointStrs := strings.Split(tracepointFlag, ",") - tracepoints := make([]*perfTracepoint, len(tracepointStrs)) +func perfTracepointFlagToTracepoints(tracepointsFlag []string) ([]*perfTracepoint, error) { + tracepoints := make([]*perfTracepoint, len(tracepointsFlag)) - for i, tracepoint := range tracepointStrs { + for i, tracepoint := range tracepointsFlag { split := strings.Split(tracepoint, ":") if len(split) != 2 { return nil, fmt.Errorf("Invalid tracepoint config %v", tracepoint) @@ -183,10 +182,10 @@ func (c *perfTracepointCollector) updateCPU(cpu int, ch chan<- prometheus.Metric // newPerfTracepointCollector returns a configured perfTracepointCollector. func newPerfTracepointCollector( logger log.Logger, - tracepointFlag string, + tracepointsFlag []string, cpus []int, ) (*perfTracepointCollector, error) { - tracepoints, err := perfTracepointFlagToTracepoints(tracepointFlag) + tracepoints, err := perfTracepointFlagToTracepoints(tracepointsFlag) if err != nil { return nil, err } @@ -271,8 +270,8 @@ func NewPerfCollector(logger log.Logger) (Collector, error) { } // First configure any tracepoints. - if *perfTracepointsFlag != "" { - tracepointCollector, err := newPerfTracepointCollector(logger, *perfTracepointsFlag, cpus) + if *perfTracepointFlag != nil && len(*perfTracepointFlag) > 0 { + tracepointCollector, err := newPerfTracepointCollector(logger, *perfTracepointFlag, cpus) if err != nil { return nil, err } diff --git a/collector/perf_linux_test.go b/collector/perf_linux_test.go index 030b49f1fd..c420946c0f 100644 --- a/collector/perf_linux_test.go +++ b/collector/perf_linux_test.go @@ -131,13 +131,13 @@ func TestPerfCPUFlagToCPUs(t *testing.T) { func TestPerfTracepointFlagToTracepoints(t *testing.T) { tests := []struct { name string - flag string + flag []string exTracepoints []*perfTracepoint errStr string }{ { name: "valid single tracepoint", - flag: "sched:sched_kthread_stop", + flag: []string{"sched:sched_kthread_stop"}, exTracepoints: []*perfTracepoint{ { subsystem: "sched", @@ -147,7 +147,7 @@ func TestPerfTracepointFlagToTracepoints(t *testing.T) { }, { name: "valid multiple tracepoints", - flag: "sched:sched_kthread_stop,sched:sched_process_fork", + flag: []string{"sched:sched_kthread_stop", "sched:sched_process_fork"}, exTracepoints: []*perfTracepoint{ { subsystem: "sched",