From cc6f2166e3e115461f9fc04603fd69bb6673dd71 Mon Sep 17 00:00:00 2001 From: Yashwant Date: Wed, 3 Jul 2024 13:07:20 +0530 Subject: [PATCH 1/3] adding cpu and memory metrics --- go.mod | 2 + go.sum | 4 + instrumentation/opentelemetry/init.go | 2 + .../internal/metrics/system_metrics.go | 120 ++++++++++++++++++ 4 files changed, 128 insertions(+) create mode 100644 instrumentation/opentelemetry/internal/metrics/system_metrics.go diff --git a/go.mod b/go.mod index 77294cca..5a8d1e5c 100644 --- a/go.mod +++ b/go.mod @@ -59,6 +59,8 @@ require ( github.com/modern-go/reflect2 v1.0.2 // indirect github.com/pelletier/go-toml/v2 v2.0.8 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/tklauser/go-sysconf v0.3.14 // indirect + github.com/tklauser/numcpus v0.8.0 // indirect github.com/twitchyliquid64/golang-asm v0.15.1 // indirect github.com/ugorji/go/codec v1.2.11 // indirect go.opentelemetry.io/proto/otlp v1.2.0 // indirect diff --git a/go.sum b/go.sum index 73eba4a3..eddc3155 100644 --- a/go.sum +++ b/go.sum @@ -627,6 +627,10 @@ github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/tklauser/go-sysconf v0.3.14 h1:g5vzr9iPFFz24v2KZXs/pvpvh8/V9Fw6vQK5ZZb78yU= +github.com/tklauser/go-sysconf v0.3.14/go.mod h1:1ym4lWMLUOhuBOPGtRcJm7tEGX4SCYNEEEtghGG/8uY= +github.com/tklauser/numcpus v0.8.0 h1:Mx4Wwe/FjZLeQsK/6kt2EOepwwSl7SmJrK5bV/dXYgY= +github.com/tklauser/numcpus v0.8.0/go.mod h1:ZJZlAY+dmR4eut8epnzf0u/VwodKmryxR8txiloSqBE= github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU= diff --git a/instrumentation/opentelemetry/init.go b/instrumentation/opentelemetry/init.go index 99959896..b88a208d 100644 --- a/instrumentation/opentelemetry/init.go +++ b/instrumentation/opentelemetry/init.go @@ -16,6 +16,7 @@ import ( config "github.com/hypertrace/agent-config/gen/go/v1" modbsp "github.com/hypertrace/goagent/instrumentation/opentelemetry/batchspanprocessor" "github.com/hypertrace/goagent/instrumentation/opentelemetry/identifier" + "github.com/hypertrace/goagent/instrumentation/opentelemetry/internal/metrics" "github.com/hypertrace/goagent/sdk" sdkconfig "github.com/hypertrace/goagent/sdk/config" "github.com/hypertrace/goagent/version" @@ -434,6 +435,7 @@ func initializeMetrics(cfg *config.AgentConfig, versionInfoAttrs []attribute.Key meterProvider := metric.NewMeterProvider(metric.WithReader(periodicReader), metric.WithResource(metricResources)) otel.SetMeterProvider(meterProvider) + metrics.InitialiseMetrics() return func() { err = meterProvider.Shutdown(context.Background()) if err != nil { diff --git a/instrumentation/opentelemetry/internal/metrics/system_metrics.go b/instrumentation/opentelemetry/internal/metrics/system_metrics.go new file mode 100644 index 00000000..ca2d663c --- /dev/null +++ b/instrumentation/opentelemetry/internal/metrics/system_metrics.go @@ -0,0 +1,120 @@ +package metrics + +import ( + "context" + "errors" + "fmt" + "log" + "os" + "path/filepath" + "strconv" + "strings" + + "github.com/tklauser/go-sysconf" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/metric" +) + +const meterName = "hypertrace.goagent.metrics" + +type systemMetrics struct { + memory float64 + cpuSecondsTotal float64 +} + +type processStats struct { + utime float64 + stime float64 + cutime float64 + cstime float64 + rss float64 +} + +const procStatArrayLength = 52 + +var ( + clkTck = getClockTicks() + pageSize = float64(os.Getpagesize()) +) + +func InitialiseMetrics() { + meterProvider := otel.GetMeterProvider() + meter := meterProvider.Meter(meterName) + err := setUpMetricRecorder(meter) + if err != nil { + fmt.Println("error initialising metrics, failed to setup metric recorder") + } +} + +func processStatsFromPid(pid int) (*systemMetrics, error) { + sysInfo := &systemMetrics{} + procFilepath := filepath.Join("/proc", strconv.Itoa(pid), "stat") + var err error + if procStatFileBytes, err := os.ReadFile(filepath.Clean(procFilepath)); err == nil { + if stat, err := parseProcStatFile(procStatFileBytes, procFilepath); err == nil { + sysInfo.memory = stat.rss * pageSize + sysInfo.cpuSecondsTotal = (stat.stime + stat.utime + stat.cstime + stat.cutime) / clkTck + return sysInfo, nil + } + return nil, err + } + return nil, err +} + +// ref: /proc/pid/stat section of https://man7.org/linux/man-pages/man5/proc.5.html +func parseProcStatFile(bytesArr []byte, procFilepath string) (*processStats, error) { + infos := strings.Split(string(bytesArr), " ") + if len(infos) != procStatArrayLength { + return nil, errors.New(fmt.Sprintf("%s file could not be parsed", procFilepath)) + } + return &processStats{ + utime: parseFloat(infos[13]), + stime: parseFloat(infos[14]), + cutime: parseFloat(infos[15]), + cstime: parseFloat(infos[16]), + rss: parseFloat(infos[23]), + }, nil +} +func parseFloat(val string) float64 { + floatVal, _ := strconv.ParseFloat(val, 64) + return floatVal +} + +// sysconf for go. claims to work without cgo or external binaries +// https://pkg.go.dev/github.com/tklauser/go-sysconf@v0.3.14#section-readme +func getClockTicks() float64 { + clktck, err := sysconf.Sysconf(sysconf.SC_CLK_TCK) + if err != nil { + return float64(100) + } + return float64(clktck) +} + +func setUpMetricRecorder(meter metric.Meter) error { + if meter == nil { + return fmt.Errorf("error while setting up metric recorder: meter is nil") + } + cpuSeconds, err := meter.Float64ObservableCounter("cpu.seconds.total", metric.WithDescription("Metric to monitor total CPU seconds")) + if err != nil { + return fmt.Errorf("error while setting up cpu seconds metric counter: %v", err) + } + memory, err := meter.Float64ObservableGauge("memory", metric.WithDescription("Metric to monitor memory usage")) + if err != nil { + return fmt.Errorf("error while setting up memory metric counter: %v", err) + } + // Register the callback function for both cpu_seconds and memory observable gauges + _, err = meter.RegisterCallback( + func(ctx context.Context, result metric.Observer) error { + systemMetrics, err := processStatsFromPid(os.Getpid()) + result.ObserveFloat64(cpuSeconds, systemMetrics.cpuSecondsTotal) + result.ObserveFloat64(memory, systemMetrics.memory) + return err + }, + cpuSeconds, memory, + ) + if err != nil { + log.Fatalf("failed to register callback: %v", err) + return err + } + return nil +} From 9bdbfcc1ba92f01f0521f8aa812884bcf10b4f4c Mon Sep 17 00:00:00 2001 From: Yashwant Date: Thu, 11 Jul 2024 13:47:17 +0530 Subject: [PATCH 2/3] lint fix --- instrumentation/opentelemetry/init.go | 2 +- .../opentelemetry/internal/metrics/system_metrics.go | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/instrumentation/opentelemetry/init.go b/instrumentation/opentelemetry/init.go index b88a208d..88365c08 100644 --- a/instrumentation/opentelemetry/init.go +++ b/instrumentation/opentelemetry/init.go @@ -435,7 +435,7 @@ func initializeMetrics(cfg *config.AgentConfig, versionInfoAttrs []attribute.Key meterProvider := metric.NewMeterProvider(metric.WithReader(periodicReader), metric.WithResource(metricResources)) otel.SetMeterProvider(meterProvider) - metrics.InitialiseMetrics() + metrics.InitializeSystemMetrics() return func() { err = meterProvider.Shutdown(context.Background()) if err != nil { diff --git a/instrumentation/opentelemetry/internal/metrics/system_metrics.go b/instrumentation/opentelemetry/internal/metrics/system_metrics.go index ca2d663c..220f73fb 100644 --- a/instrumentation/opentelemetry/internal/metrics/system_metrics.go +++ b/instrumentation/opentelemetry/internal/metrics/system_metrics.go @@ -2,7 +2,6 @@ package metrics import ( "context" - "errors" "fmt" "log" "os" @@ -37,7 +36,7 @@ var ( pageSize = float64(os.Getpagesize()) ) -func InitialiseMetrics() { +func InitializeSystemMetrics() { meterProvider := otel.GetMeterProvider() meter := meterProvider.Meter(meterName) err := setUpMetricRecorder(meter) @@ -65,7 +64,7 @@ func processStatsFromPid(pid int) (*systemMetrics, error) { func parseProcStatFile(bytesArr []byte, procFilepath string) (*processStats, error) { infos := strings.Split(string(bytesArr), " ") if len(infos) != procStatArrayLength { - return nil, errors.New(fmt.Sprintf("%s file could not be parsed", procFilepath)) + return nil, fmt.Errorf("%s file could not be parsed", procFilepath) } return &processStats{ utime: parseFloat(infos[13]), From beb9f7fba5fb5d5eda2b5a0772caab841f627d16 Mon Sep 17 00:00:00 2001 From: Yashwant Date: Thu, 11 Jul 2024 20:30:50 +0530 Subject: [PATCH 3/3] review comments --- .../opentelemetry/internal/metrics/system_metrics.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/instrumentation/opentelemetry/internal/metrics/system_metrics.go b/instrumentation/opentelemetry/internal/metrics/system_metrics.go index 220f73fb..6557f569 100644 --- a/instrumentation/opentelemetry/internal/metrics/system_metrics.go +++ b/instrumentation/opentelemetry/internal/metrics/system_metrics.go @@ -14,7 +14,7 @@ import ( "go.opentelemetry.io/otel/metric" ) -const meterName = "hypertrace.goagent.metrics" +const meterName = "goagent.hypertrace.org/metrics" type systemMetrics struct { memory float64 @@ -41,7 +41,7 @@ func InitializeSystemMetrics() { meter := meterProvider.Meter(meterName) err := setUpMetricRecorder(meter) if err != nil { - fmt.Println("error initialising metrics, failed to setup metric recorder") + log.Printf("error initialising metrics, failed to setup metric recorder: %v\n", err) } } @@ -74,6 +74,7 @@ func parseProcStatFile(bytesArr []byte, procFilepath string) (*processStats, err rss: parseFloat(infos[23]), }, nil } + func parseFloat(val string) float64 { floatVal, _ := strconv.ParseFloat(val, 64) return floatVal @@ -93,11 +94,11 @@ func setUpMetricRecorder(meter metric.Meter) error { if meter == nil { return fmt.Errorf("error while setting up metric recorder: meter is nil") } - cpuSeconds, err := meter.Float64ObservableCounter("cpu.seconds.total", metric.WithDescription("Metric to monitor total CPU seconds")) + cpuSeconds, err := meter.Float64ObservableCounter("hypertrace.agent.cpu.seconds.total", metric.WithDescription("Metric to monitor total CPU seconds")) if err != nil { return fmt.Errorf("error while setting up cpu seconds metric counter: %v", err) } - memory, err := meter.Float64ObservableGauge("memory", metric.WithDescription("Metric to monitor memory usage")) + memory, err := meter.Float64ObservableGauge("hypertrace.agent.memory", metric.WithDescription("Metric to monitor memory usage")) if err != nil { return fmt.Errorf("error while setting up memory metric counter: %v", err) }