Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,13 @@ configuration is zero indexed and can also take a stride value
`--collector.perf --collector.perf.cpus=1-10:5`, would collect on CPUs
1, 5, and 10.

The perf collector is also able to collect
[tracepoint](https://www.kernel.org/doc/html/latest/core-api/tracepoint.html)
counts when using the `--collector.perf.tracepoint` flag. Tracepoints can be
found using [`perf list`](http://man7.org/linux/man-pages/man1/perf.1.html) or
from debugfs. And example usage of this would be
`--collector.perf.tracepoint="sched:sched_process_exec"`.


Name | Description | OS
---------|-------------|----
Expand Down
185 changes: 170 additions & 15 deletions collector/perf_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,10 @@ import (
"strings"

"github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level"
"github.com/hodgesds/perf-utils"
"github.com/prometheus/client_golang/prometheus"
"golang.org/x/sys/unix"
kingpin "gopkg.in/alecthomas/kingpin.v2"
)

Expand All @@ -30,27 +32,29 @@ const (
)

var (
perfCPUsFlag = kingpin.Flag("collector.perf.cpus", "List of CPUs from which perf metrics should be collected").Default("").String()
perfCPUsFlag = kingpin.Flag("collector.perf.cpus", "List of CPUs from which perf metrics should be collected").Default("").String()
perfTracepointFlag = kingpin.Flag("collector.perf.tracepoint", "perf tracepoint that should be collected").Strings()
)

func init() {
registerCollector(perfSubsystem, defaultDisabled, NewPerfCollector)
}

// perfCollector is a Collector that uses the perf subsystem to collect
// metrics. It uses perf_event_open an ioctls for profiling. Due to the fact
// that the perf subsystem is highly dependent on kernel configuration and
// settings not all profiler values may be exposed on the target system at any
// given time.
type perfCollector struct {
hwProfilerCPUMap map[*perf.HardwareProfiler]int
swProfilerCPUMap map[*perf.SoftwareProfiler]int
cacheProfilerCPUMap map[*perf.CacheProfiler]int
perfHwProfilers map[int]*perf.HardwareProfiler
perfSwProfilers map[int]*perf.SoftwareProfiler
perfCacheProfilers map[int]*perf.CacheProfiler
desc map[string]*prometheus.Desc
logger log.Logger
// perfTracepointFlagToTracepoints returns the set of configured tracepoints.
func perfTracepointFlagToTracepoints(tracepointsFlag []string) ([]*perfTracepoint, error) {
tracepoints := make([]*perfTracepoint, len(tracepointsFlag))

for i, tracepoint := range tracepointsFlag {
split := strings.Split(tracepoint, ":")
if len(split) != 2 {
return nil, fmt.Errorf("Invalid tracepoint config %v", tracepoint)
}
tracepoints[i] = &perfTracepoint{
subsystem: split[0],
event: split[1],
}
}
return tracepoints, nil
}

// perfCPUFlagToCPUs returns a set of CPUs for the perf collectors to monitor.
Expand Down Expand Up @@ -98,6 +102,144 @@ func perfCPUFlagToCPUs(cpuFlag string) ([]int, error) {
return cpus, nil
}

// perfTracepoint is a struct for holding tracepoint information.
type perfTracepoint struct {
subsystem string
event string
}

// label returns the tracepoint name in the format of subsystem_tracepoint.
func (t *perfTracepoint) label() string {
return t.subsystem + "_" + t.event
}

// tracepoint returns the tracepoint name in the format of subsystem:tracepoint.
func (t *perfTracepoint) tracepoint() string {
return t.subsystem + ":" + t.event
}

// perfCollector is a Collector that uses the perf subsystem to collect
// metrics. It uses perf_event_open an ioctls for profiling. Due to the fact
// that the perf subsystem is highly dependent on kernel configuration and
// settings not all profiler values may be exposed on the target system at any
// given time.
type perfCollector struct {
hwProfilerCPUMap map[*perf.HardwareProfiler]int
swProfilerCPUMap map[*perf.SoftwareProfiler]int
cacheProfilerCPUMap map[*perf.CacheProfiler]int
perfHwProfilers map[int]*perf.HardwareProfiler
perfSwProfilers map[int]*perf.SoftwareProfiler
perfCacheProfilers map[int]*perf.CacheProfiler
desc map[string]*prometheus.Desc
logger log.Logger
tracepointCollector *perfTracepointCollector
}

type perfTracepointCollector struct {
// desc is the mapping of subsystem to tracepoint *prometheus.Desc.
descs map[string]map[string]*prometheus.Desc
// collection order is the sorted configured collection order of the profiler.
collectionOrder []string

logger log.Logger
profilers map[int]perf.GroupProfiler
}

// update is used collect all tracepoints across all tracepoint profilers.
func (c *perfTracepointCollector) update(ch chan<- prometheus.Metric) error {
for cpu := range c.profilers {
if err := c.updateCPU(cpu, ch); err != nil {
return err
}
}
return nil
}

// updateCPU is used to update metrics per CPU profiler.
func (c *perfTracepointCollector) updateCPU(cpu int, ch chan<- prometheus.Metric) error {
cpuStr := fmt.Sprintf("%d", cpu)
profiler := c.profilers[cpu]
p, err := profiler.Profile()
if err != nil {
level.Error(c.logger).Log("msg", "Failed to collect tracepoint profile", "err", err)
return err
}

for i, value := range p.Values {
// Get the Desc from the ordered group value.
descKey := c.collectionOrder[i]
descKeySlice := strings.Split(descKey, ":")
ch <- prometheus.MustNewConstMetric(
c.descs[descKeySlice[0]][descKeySlice[1]],
prometheus.CounterValue,
float64(value),
cpuStr,
)
}
return nil
}

// newPerfTracepointCollector returns a configured perfTracepointCollector.
func newPerfTracepointCollector(
logger log.Logger,
tracepointsFlag []string,
cpus []int,
) (*perfTracepointCollector, error) {
tracepoints, err := perfTracepointFlagToTracepoints(tracepointsFlag)
if err != nil {
return nil, err
}

collectionOrder := make([]string, len(tracepoints))
descs := map[string]map[string]*prometheus.Desc{}
eventAttrs := make([]unix.PerfEventAttr, len(tracepoints))

for i, tracepoint := range tracepoints {
eventAttr, err := perf.TracepointEventAttr(tracepoint.subsystem, tracepoint.event)
if err != nil {
return nil, err
}
eventAttrs[i] = *eventAttr
collectionOrder[i] = tracepoint.tracepoint()
if _, ok := descs[tracepoint.subsystem]; !ok {
descs[tracepoint.subsystem] = map[string]*prometheus.Desc{}
}
descs[tracepoint.subsystem][tracepoint.event] = prometheus.NewDesc(
prometheus.BuildFQName(
namespace,
perfSubsystem,
tracepoint.label(),
),
"Perf tracepoint "+tracepoint.tracepoint(),
[]string{"cpu"},
nil,
)
}

profilers := make(map[int]perf.GroupProfiler, len(cpus))
for _, cpu := range cpus {
profiler, err := perf.NewGroupProfiler(-1, cpu, 0, eventAttrs...)
if err != nil {
return nil, err
}
profilers[cpu] = profiler
}

c := &perfTracepointCollector{
descs: descs,
collectionOrder: collectionOrder,
profilers: profilers,
logger: logger,
}

for _, profiler := range c.profilers {
if err := profiler.Start(); err != nil {
return nil, err
}
}
return c, nil
}

// NewPerfCollector returns a new perf based collector, it creates a profiler
// per CPU.
func NewPerfCollector(logger log.Logger) (Collector, error) {
Expand Down Expand Up @@ -127,6 +269,16 @@ func NewPerfCollector(logger log.Logger) (Collector, error) {
}
}

// First configure any tracepoints.
if *perfTracepointFlag != nil && len(*perfTracepointFlag) > 0 {
tracepointCollector, err := newPerfTracepointCollector(logger, *perfTracepointFlag, cpus)
if err != nil {
return nil, err
}
collector.tracepointCollector = tracepointCollector
}

// Configure all profilers for the specified CPUs.
for _, cpu := range cpus {
// Use -1 to profile all processes on the CPU, see:
// man perf_event_open
Expand Down Expand Up @@ -408,6 +560,9 @@ func (c *perfCollector) Update(ch chan<- prometheus.Metric) error {
if err := c.updateCacheStats(ch); err != nil {
return err
}
if c.tracepointCollector != nil {
return c.tracepointCollector.update(ch)
}

return nil
}
Expand Down
70 changes: 68 additions & 2 deletions collector/perf_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,74 @@ func TestPerfCPUFlagToCPUs(t *testing.T) {
if test.exCpus[i] != cpus[i] {
t.Fatalf(
"expected cpus %v, got %v",
test.exCpus,
cpus,
test.exCpus[i],
cpus[i],
)
}
}
})
}
}

func TestPerfTracepointFlagToTracepoints(t *testing.T) {
tests := []struct {
name string
flag []string
exTracepoints []*perfTracepoint
errStr string
}{
{
name: "valid single tracepoint",
flag: []string{"sched:sched_kthread_stop"},
exTracepoints: []*perfTracepoint{
{
subsystem: "sched",
event: "sched_kthread_stop",
},
},
},
{
name: "valid multiple tracepoints",
flag: []string{"sched:sched_kthread_stop", "sched:sched_process_fork"},
exTracepoints: []*perfTracepoint{
{
subsystem: "sched",
event: "sched_kthread_stop",
},
{
subsystem: "sched",
event: "sched_process_fork",
},
},
},
}

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
tracepoints, err := perfTracepointFlagToTracepoints(test.flag)
if test.errStr != "" {
if err != nil {
t.Fatal("expected error to not be nil")
}
if test.errStr != err.Error() {
t.Fatalf(
"expected error %q, got %q",
test.errStr,
err.Error(),
)
}
return
}
if err != nil {
t.Fatal(err)
}
for i := range tracepoints {
if test.exTracepoints[i].event != tracepoints[i].event &&
test.exTracepoints[i].subsystem != tracepoints[i].subsystem {
t.Fatalf(
"expected tracepoint %v, got %v",
test.exTracepoints[i],
tracepoints[i],
)
}
}
Expand Down