diff --git a/go.mod b/go.mod index 77294cca..5a8d1e5c 100644 --- a/go.mod +++ b/go.mod @@ -59,6 +59,8 @@ require ( github.com/modern-go/reflect2 v1.0.2 // indirect github.com/pelletier/go-toml/v2 v2.0.8 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/tklauser/go-sysconf v0.3.14 // indirect + github.com/tklauser/numcpus v0.8.0 // indirect github.com/twitchyliquid64/golang-asm v0.15.1 // indirect github.com/ugorji/go/codec v1.2.11 // indirect go.opentelemetry.io/proto/otlp v1.2.0 // indirect diff --git a/go.sum b/go.sum index 73eba4a3..eddc3155 100644 --- a/go.sum +++ b/go.sum @@ -627,6 +627,10 @@ github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/tklauser/go-sysconf v0.3.14 h1:g5vzr9iPFFz24v2KZXs/pvpvh8/V9Fw6vQK5ZZb78yU= +github.com/tklauser/go-sysconf v0.3.14/go.mod h1:1ym4lWMLUOhuBOPGtRcJm7tEGX4SCYNEEEtghGG/8uY= +github.com/tklauser/numcpus v0.8.0 h1:Mx4Wwe/FjZLeQsK/6kt2EOepwwSl7SmJrK5bV/dXYgY= +github.com/tklauser/numcpus v0.8.0/go.mod h1:ZJZlAY+dmR4eut8epnzf0u/VwodKmryxR8txiloSqBE= github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU= diff --git a/instrumentation/opentelemetry/init.go b/instrumentation/opentelemetry/init.go index 99959896..88365c08 100644 --- a/instrumentation/opentelemetry/init.go +++ b/instrumentation/opentelemetry/init.go @@ -16,6 +16,7 @@ import ( config "github.com/hypertrace/agent-config/gen/go/v1" modbsp "github.com/hypertrace/goagent/instrumentation/opentelemetry/batchspanprocessor" "github.com/hypertrace/goagent/instrumentation/opentelemetry/identifier" + "github.com/hypertrace/goagent/instrumentation/opentelemetry/internal/metrics" "github.com/hypertrace/goagent/sdk" sdkconfig "github.com/hypertrace/goagent/sdk/config" "github.com/hypertrace/goagent/version" @@ -434,6 +435,7 @@ func initializeMetrics(cfg *config.AgentConfig, versionInfoAttrs []attribute.Key meterProvider := metric.NewMeterProvider(metric.WithReader(periodicReader), metric.WithResource(metricResources)) otel.SetMeterProvider(meterProvider) + metrics.InitializeSystemMetrics() return func() { err = meterProvider.Shutdown(context.Background()) if err != nil { diff --git a/instrumentation/opentelemetry/internal/metrics/system_metrics.go b/instrumentation/opentelemetry/internal/metrics/system_metrics.go new file mode 100644 index 00000000..6557f569 --- /dev/null +++ b/instrumentation/opentelemetry/internal/metrics/system_metrics.go @@ -0,0 +1,120 @@ +package metrics + +import ( + "context" + "fmt" + "log" + "os" + "path/filepath" + "strconv" + "strings" + + "github.com/tklauser/go-sysconf" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/metric" +) + +const meterName = "goagent.hypertrace.org/metrics" + +type systemMetrics struct { + memory float64 + cpuSecondsTotal float64 +} + +type processStats struct { + utime float64 + stime float64 + cutime float64 + cstime float64 + rss float64 +} + +const procStatArrayLength = 52 + +var ( + clkTck = getClockTicks() + pageSize = float64(os.Getpagesize()) +) + +func InitializeSystemMetrics() { + meterProvider := otel.GetMeterProvider() + meter := meterProvider.Meter(meterName) + err := setUpMetricRecorder(meter) + if err != nil { + log.Printf("error initialising metrics, failed to setup metric recorder: %v\n", err) + } +} + +func processStatsFromPid(pid int) (*systemMetrics, error) { + sysInfo := &systemMetrics{} + procFilepath := filepath.Join("/proc", strconv.Itoa(pid), "stat") + var err error + if procStatFileBytes, err := os.ReadFile(filepath.Clean(procFilepath)); err == nil { + if stat, err := parseProcStatFile(procStatFileBytes, procFilepath); err == nil { + sysInfo.memory = stat.rss * pageSize + sysInfo.cpuSecondsTotal = (stat.stime + stat.utime + stat.cstime + stat.cutime) / clkTck + return sysInfo, nil + } + return nil, err + } + return nil, err +} + +// ref: /proc/pid/stat section of https://man7.org/linux/man-pages/man5/proc.5.html +func parseProcStatFile(bytesArr []byte, procFilepath string) (*processStats, error) { + infos := strings.Split(string(bytesArr), " ") + if len(infos) != procStatArrayLength { + return nil, fmt.Errorf("%s file could not be parsed", procFilepath) + } + return &processStats{ + utime: parseFloat(infos[13]), + stime: parseFloat(infos[14]), + cutime: parseFloat(infos[15]), + cstime: parseFloat(infos[16]), + rss: parseFloat(infos[23]), + }, nil +} + +func parseFloat(val string) float64 { + floatVal, _ := strconv.ParseFloat(val, 64) + return floatVal +} + +// sysconf for go. claims to work without cgo or external binaries +// https://pkg.go.dev/github.com/tklauser/go-sysconf@v0.3.14#section-readme +func getClockTicks() float64 { + clktck, err := sysconf.Sysconf(sysconf.SC_CLK_TCK) + if err != nil { + return float64(100) + } + return float64(clktck) +} + +func setUpMetricRecorder(meter metric.Meter) error { + if meter == nil { + return fmt.Errorf("error while setting up metric recorder: meter is nil") + } + cpuSeconds, err := meter.Float64ObservableCounter("hypertrace.agent.cpu.seconds.total", metric.WithDescription("Metric to monitor total CPU seconds")) + if err != nil { + return fmt.Errorf("error while setting up cpu seconds metric counter: %v", err) + } + memory, err := meter.Float64ObservableGauge("hypertrace.agent.memory", metric.WithDescription("Metric to monitor memory usage")) + if err != nil { + return fmt.Errorf("error while setting up memory metric counter: %v", err) + } + // Register the callback function for both cpu_seconds and memory observable gauges + _, err = meter.RegisterCallback( + func(ctx context.Context, result metric.Observer) error { + systemMetrics, err := processStatsFromPid(os.Getpid()) + result.ObserveFloat64(cpuSeconds, systemMetrics.cpuSecondsTotal) + result.ObserveFloat64(memory, systemMetrics.memory) + return err + }, + cpuSeconds, memory, + ) + if err != nil { + log.Fatalf("failed to register callback: %v", err) + return err + } + return nil +}