From 3b9513fd2a52f59d856e0c4eefb43ac4f39c2e48 Mon Sep 17 00:00:00 2001 From: Mark Wolfe Date: Thu, 3 Apr 2025 16:43:57 +1100 Subject: [PATCH 1/4] Add otel metrics export to the agent --- go.mod | 4 +- go.sum | 6 +- internal/job/tracing.go | 32 +++++++ tracetools/metrics.go | 183 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 222 insertions(+), 3 deletions(-) create mode 100644 tracetools/metrics.go diff --git a/go.mod b/go.mod index 4419df933e..8abada302a 100644 --- a/go.mod +++ b/go.mod @@ -47,7 +47,10 @@ require ( go.opentelemetry.io/otel v1.35.0 go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.35.0 go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.35.0 + go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.35.0 + go.opentelemetry.io/otel/metric v1.35.0 go.opentelemetry.io/otel/sdk v1.35.0 + go.opentelemetry.io/otel/sdk/metric v1.35.0 go.opentelemetry.io/otel/trace v1.35.0 golang.org/x/crypto v0.37.0 golang.org/x/net v0.38.0 @@ -155,7 +158,6 @@ require ( go.opentelemetry.io/collector/pdata/pprofile v0.104.0 // indirect go.opentelemetry.io/collector/semconv v0.104.0 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.59.0 // indirect - go.opentelemetry.io/otel/metric v1.35.0 // indirect go.opentelemetry.io/proto/otlp v1.5.0 // indirect go.uber.org/atomic v1.11.0 // indirect go.uber.org/multierr v1.11.0 // indirect diff --git a/go.sum b/go.sum index 034f57f701..25810fc86d 100644 --- a/go.sum +++ b/go.sum @@ -397,12 +397,14 @@ go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.35.0 h1:m639+ go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.35.0/go.mod h1:LjReUci/F4BUyv+y4dwnq3h/26iNOeC3wAIqgvTIZVo= go.opentelemetry.io/otel/exporters/prometheus v0.49.0 h1:Er5I1g/YhfYv9Affk9nJLfH/+qCCVVg1f2R9AbJfqDQ= go.opentelemetry.io/otel/exporters/prometheus v0.49.0/go.mod h1:KfQ1wpjf3zsHjzP149P4LyAwWRupc6c7t1ZJ9eXpKQM= +go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.35.0 h1:PB3Zrjs1sG1GBX51SXyTSoOTqcDglmsk7nT6tkKPb/k= +go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.35.0/go.mod h1:U2R3XyVPzn0WX7wOIypPuptulsMcPDPs/oiSVOMVnHY= go.opentelemetry.io/otel/metric v1.35.0 h1:0znxYu2SNyuMSQT4Y9WDWej0VpcsxkuklLa4/siN90M= go.opentelemetry.io/otel/metric v1.35.0/go.mod h1:nKVFgxBZ2fReX6IlyW28MgZojkoAkJGaE8CpgeAU3oE= go.opentelemetry.io/otel/sdk v1.35.0 h1:iPctf8iprVySXSKJffSS79eOjl9pvxV9ZqOWT0QejKY= go.opentelemetry.io/otel/sdk v1.35.0/go.mod h1:+ga1bZliga3DxJ3CQGg3updiaAJoNECOgJREo9KHGQg= -go.opentelemetry.io/otel/sdk/metric v1.34.0 h1:5CeK9ujjbFVL5c1PhLuStg1wxA7vQv7ce1EK0Gyvahk= -go.opentelemetry.io/otel/sdk/metric v1.34.0/go.mod h1:jQ/r8Ze28zRKoNRdkjCZxfs6YvBTG1+YIqyFVFYec5w= +go.opentelemetry.io/otel/sdk/metric v1.35.0 h1:1RriWBmCKgkeHEhM7a2uMjMUfP7MsOF5JpUCaEqEI9o= +go.opentelemetry.io/otel/sdk/metric v1.35.0/go.mod h1:is6XYCUMpcKi+ZsOvfluY5YstFnhW0BidkR+gL+qN+w= go.opentelemetry.io/otel/trace v1.35.0 h1:dPpEfJu1sDIqruz7BHFG3c7528f6ddfSWfFDVt/xgMs= go.opentelemetry.io/otel/trace v1.35.0/go.mod h1:WUk7DtFp1Aw2MkvqGdwiXYDZZNvA/1J8o6xRXLrIkyc= go.opentelemetry.io/proto/otlp v1.5.0 h1:xJvq7gMzB31/d406fB8U5CBdyQGw4P399D1aQWU/3i4= diff --git a/internal/job/tracing.go b/internal/job/tracing.go index 614cbdaa97..7ed6476352 100644 --- a/internal/job/tracing.go +++ b/internal/job/tracing.go @@ -20,7 +20,10 @@ import ( "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/exporters/otlp/otlptrace" "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" + "go.opentelemetry.io/otel/exporters/stdout/stdoutmetric" "go.opentelemetry.io/otel/propagation" + "go.opentelemetry.io/otel/sdk/metric" + sdkmetric "go.opentelemetry.io/otel/sdk/metric" "go.opentelemetry.io/otel/sdk/resource" sdktrace "go.opentelemetry.io/otel/sdk/trace" semconv "go.opentelemetry.io/otel/semconv/v1.4.0" @@ -147,8 +150,33 @@ func (e *Executor) startTracingOpenTelemetry(ctx context.Context) (tracetools.Sp attributes = append(attributes, extras...) resources := resource.NewWithAttributes(semconv.SchemaURL, attributes...) + + consoleExporter, err := stdoutmetric.New() + if err != nil { + e.shell.Errorf("Error creating prom metric exporter %s. Disabling tracing.", err) + return &tracetools.NoopSpan{}, ctx, noopStopper + } + + meterProvider := sdkmetric.NewMeterProvider( + sdkmetric.WithReader(metric.NewPeriodicReader(consoleExporter)), + sdkmetric.WithResource(resources), + ) + otel.SetMeterProvider(meterProvider) + + batchProcessor := sdktrace.NewBatchSpanProcessor( + // You would configure an actual exporter here if needed + exporter, + ) + + spanMetricsProcessor, err := tracetools.NewSpanMetricsProcessor(meterProvider, batchProcessor) + if err != nil { + e.shell.Errorf("Error creating OTLP metric exporter %s. Disabling tracing.", err) + return &tracetools.NoopSpan{}, ctx, noopStopper + } + tracerProvider := sdktrace.NewTracerProvider( sdktrace.WithBatcher(exporter), + sdktrace.WithSpanProcessor(spanMetricsProcessor), sdktrace.WithResource(resources), ) @@ -176,8 +204,12 @@ func (e *Executor) startTracingOpenTelemetry(ctx context.Context) (tracetools.Sp stop := func() { ctx := context.Background() + _ = meterProvider.ForceFlush(ctx) + _ = meterProvider.Shutdown(ctx) _ = tracerProvider.ForceFlush(ctx) _ = tracerProvider.Shutdown(ctx) + _ = spanMetricsProcessor.ForceFlush(ctx) + _ = spanMetricsProcessor.Shutdown(ctx) } return tracetools.NewOpenTelemetrySpan(span), ctx, stop diff --git a/tracetools/metrics.go b/tracetools/metrics.go new file mode 100644 index 0000000000..b7762e00b5 --- /dev/null +++ b/tracetools/metrics.go @@ -0,0 +1,183 @@ +package tracetools + +import ( + "context" + "fmt" + "sync" + "time" + + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/metric" + sdktrace "go.opentelemetry.io/otel/sdk/trace" + // semconv "go.opentelemetry.io/otel/semconv/v1.17.0" +) + +// SpanMetricsProcessor implements a trace processor that generates metrics from spans +type SpanMetricsProcessor struct { + meter metric.Meter + histogramMetric metric.Float64Histogram + counterMetric metric.Int64Counter + errorCounter metric.Int64Counter + mutex sync.Mutex + nextProcessor sdktrace.SpanProcessor +} + +// NewSpanMetricsProcessor creates a new SpanMetricsProcessor +func NewSpanMetricsProcessor(mp metric.MeterProvider, nextProcessor sdktrace.SpanProcessor) (*SpanMetricsProcessor, error) { + meter := mp.Meter("span-metrics") + + // Create a histogram for span durations + histogram, err := meter.Float64Histogram( + "span.duration", + metric.WithDescription("The duration of spans"), + metric.WithUnit("ms"), + ) + if err != nil { + return nil, fmt.Errorf("failed to create duration histogram: %w", err) + } + + // Create a counter for span counts + counter, err := meter.Int64Counter( + "span.count", + metric.WithDescription("The number of spans processed"), + metric.WithUnit("1"), + ) + if err != nil { + return nil, fmt.Errorf("failed to create span counter: %w", err) + } + + // Create a counter for span errors + errorCounter, err := meter.Int64Counter( + "span.errors", + metric.WithDescription("The number of errored spans"), + metric.WithUnit("1"), + ) + if err != nil { + return nil, fmt.Errorf("failed to create error counter: %w", err) + } + + return &SpanMetricsProcessor{ + meter: meter, + histogramMetric: histogram, + counterMetric: counter, + errorCounter: errorCounter, + nextProcessor: nextProcessor, + }, nil +} + +// OnStart implements the SpanProcessor interface +func (smp *SpanMetricsProcessor) OnStart(parent context.Context, s sdktrace.ReadWriteSpan) { + if smp.nextProcessor != nil { + smp.nextProcessor.OnStart(parent, s) + } +} + +// OnEnd implements the SpanProcessor interface +func (smp *SpanMetricsProcessor) OnEnd(s sdktrace.ReadOnlySpan) { + smp.mutex.Lock() + defer smp.mutex.Unlock() + + // Extract relevant attributes from the span + attrs := []attribute.KeyValue{ + // semconv.ServiceNameKey.String(s.Resource().Attributes()[semconv.ServiceNameKey].AsString()), + attribute.String("span.name", s.Name()), + attribute.String("span.kind", s.SpanKind().String()), + } + + // Record duration in milliseconds + durationMs := float64(s.EndTime().Sub(s.StartTime())) / float64(time.Millisecond) + smp.histogramMetric.Record(context.Background(), durationMs, metric.WithAttributes(attrs...)) + + // Record span count + smp.counterMetric.Add(context.Background(), 1, metric.WithAttributes(attrs...)) + + // Record error count if span has error status + if s.Status().Code == 2 { // Error status + smp.errorCounter.Add(context.Background(), 1, metric.WithAttributes(attrs...)) + } + + // Pass to next processor if there is one + if smp.nextProcessor != nil { + smp.nextProcessor.OnEnd(s) + } +} + +// Shutdown implements the SpanProcessor interface +func (smp *SpanMetricsProcessor) Shutdown(ctx context.Context) error { + if smp.nextProcessor != nil { + return smp.nextProcessor.Shutdown(ctx) + } + return nil +} + +// ForceFlush implements the SpanProcessor interface +func (smp *SpanMetricsProcessor) ForceFlush(ctx context.Context) error { + if smp.nextProcessor != nil { + return smp.nextProcessor.ForceFlush(ctx) + } + return nil +} + +// func main() { +// // Initialize resource describing the service +// res, err := resource.New(context.Background(), +// resource.WithAttributes( +// semconv.ServiceNameKey.String("my-service"), +// semconv.ServiceVersionKey.String("1.0.0"), +// ), +// ) +// if err != nil { +// log.Fatalf("Failed to create resource: %v", err) +// } + +// // Set up the prometheus exporter +// promExporter, err := prometheus.New() +// if err != nil { +// log.Fatalf("Failed to create Prometheus exporter: %v", err) +// } + +// // Create a metrics provider +// meterProvider := sdkmetric.NewMeterProvider( +// sdkmetric.WithReader(promExporter), +// sdkmetric.WithResource(res), +// ) +// otel.SetMeterProvider(meterProvider) + +// // Create a batch span processor for normal span processing +// batchProcessor := sdktrace.NewBatchSpanProcessor( +// // You would configure an actual exporter here if needed +// sdktrace.NewNoopSpanExporter(), +// ) + +// // Create our custom SpanMetrics processor +// spanMetricsProcessor, err := NewSpanMetricsProcessor(meterProvider, batchProcessor) +// if err != nil { +// log.Fatalf("Failed to create SpanMetrics processor: %v", err) +// } + +// // Create a tracer provider with our custom processor +// tracerProvider := sdktrace.NewTracerProvider( +// sdktrace.WithSampler(sdktrace.AlwaysSample()), +// sdktrace.WithSpanProcessor(spanMetricsProcessor), +// sdktrace.WithResource(res), +// ) +// otel.SetTracerProvider(tracerProvider) + +// // Expose Prometheus metrics endpoint +// http.Handle("/metrics", promhttp.Handler()) +// go func() { +// log.Println("Starting metrics server at :8889") +// if err := http.ListenAndServe(":8889", nil); err != nil { +// log.Fatalf("Failed to start metrics server: %v", err) +// } +// }() + +// // Create a tracer +// tracer := tracerProvider.Tracer("my-service-tracer") + +// // Your application logic here +// for { +// runSample(tracer) +// time.Sleep(1 * time.Second) +// } +// } From 49fddb2da3f3ab74ce931deb0e1c1dc5109e8da5 Mon Sep 17 00:00:00 2001 From: Mark Wolfe Date: Fri, 4 Apr 2025 09:29:47 +1100 Subject: [PATCH 2/4] Update the code to emit metrics in prometheus text format --- go.mod | 8 +++++++- go.sum | 18 ++++++++++-------- internal/job/tracing.go | 31 +++++++++++++++++++++++++------ 3 files changed, 42 insertions(+), 15 deletions(-) diff --git a/go.mod b/go.mod index 8abada302a..7226462966 100644 --- a/go.mod +++ b/go.mod @@ -36,6 +36,8 @@ require ( github.com/oleiade/reflections v1.1.0 github.com/opentracing/opentracing-go v1.2.0 github.com/pborman/uuid v1.2.1 + github.com/prometheus/client_golang v1.20.4 + github.com/prometheus/common v0.63.0 github.com/puzpuzpuz/xsync/v2 v2.5.1 github.com/qri-io/jsonschema v0.2.1 github.com/stretchr/testify v1.10.0 @@ -47,7 +49,7 @@ require ( go.opentelemetry.io/otel v1.35.0 go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.35.0 go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.35.0 - go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.35.0 + go.opentelemetry.io/otel/exporters/prometheus v0.49.0 go.opentelemetry.io/otel/metric v1.35.0 go.opentelemetry.io/otel/sdk v1.35.0 go.opentelemetry.io/otel/sdk/metric v1.35.0 @@ -97,6 +99,7 @@ require ( github.com/aws/aws-sdk-go-v2/service/ssooidc v1.30.1 // indirect github.com/aws/aws-sdk-go-v2/service/sts v1.33.18 // indirect github.com/aws/smithy-go v1.22.2 // indirect + github.com/beorn7/perks v1.0.1 // indirect github.com/bmatcuk/doublestar/v4 v4.6.1 // indirect github.com/cenkalti/backoff/v4 v4.3.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect @@ -133,12 +136,15 @@ require ( github.com/mitchellh/mapstructure v1.5.1-0.20231216201459-8508981c8b6c // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/outcaste-io/ristretto v0.2.3 // indirect github.com/philhofer/fwd v1.1.3-0.20240612014219-fbbf4953d986 // indirect github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/power-devops/perfstat v0.0.0-20220216144756-c35f1ee13d7c // indirect + github.com/prometheus/client_model v0.6.1 // indirect + github.com/prometheus/procfs v0.15.1 // indirect github.com/qri-io/jsonpointer v0.1.1 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect github.com/ryanuber/go-glob v1.0.0 // indirect diff --git a/go.sum b/go.sum index 25810fc86d..5d321fe25b 100644 --- a/go.sum +++ b/go.sum @@ -231,6 +231,8 @@ github.com/keybase/go-keychain v0.0.0-20231219164618-57a3676c3af6 h1:IsMZxCuZqKu github.com/keybase/go-keychain v0.0.0-20231219164618-57a3676c3af6/go.mod h1:3VeWNIJaW+O5xpRQbPp0Ybqu1vJd/pm7s2F473HRrkw= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= +github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= @@ -268,6 +270,8 @@ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/oleiade/reflections v1.1.0 h1:D+I/UsXQB4esMathlt0kkZRJZdUDmhv5zGi/HOwYTWo= github.com/oleiade/reflections v1.1.0/go.mod h1:mCxx0QseeVCHs5Um5HhJeCKVC7AwS8kO67tky4rdisA= github.com/opentracing/opentracing-go v1.2.0 h1:uEJPy/1a5RIPAJ0Ov+OIO8OxWu77jEv+1B0VhjKrZUs= @@ -289,14 +293,14 @@ github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndr github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= github.com/power-devops/perfstat v0.0.0-20220216144756-c35f1ee13d7c h1:NRoLoZvkBTKvR5gQLgA3e0hqjkY9u1wm+iOL45VN/qI= github.com/power-devops/perfstat v0.0.0-20220216144756-c35f1ee13d7c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= -github.com/prometheus/client_golang v1.19.1 h1:wZWJDwK+NameRJuPGDhlnFgx8e8HN3XHQeLaYJFJBOE= -github.com/prometheus/client_golang v1.19.1/go.mod h1:mP78NwGzrVks5S2H6ab8+ZZGJLZUq1hoULYBAYBw1Ho= +github.com/prometheus/client_golang v1.20.4 h1:Tgh3Yr67PaOv/uTqloMsCEdeuFTatm5zIq5+qNN23vI= +github.com/prometheus/client_golang v1.20.4/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= -github.com/prometheus/common v0.54.0 h1:ZlZy0BgJhTwVZUn7dLOkwCZHUkrAqd3WYtcFCWnM1D8= -github.com/prometheus/common v0.54.0/go.mod h1:/TQgMJP5CuVYveyT7n/0Ix8yLNNXy9yRSkhnLTHPDIQ= -github.com/prometheus/procfs v0.15.0 h1:A82kmvXJq2jTu5YUhSGNlYoxh85zLnKgPz4bMZgI5Ek= -github.com/prometheus/procfs v0.15.0/go.mod h1:Y0RJ/Y5g5wJpkTisOtqwDSo4HwhGmLB4VQSw2sQJLHk= +github.com/prometheus/common v0.63.0 h1:YR/EIY1o3mEFP/kZCD7iDMnLPlGyuU2Gb3HIcXnA98k= +github.com/prometheus/common v0.63.0/go.mod h1:VVFF/fBIoToEnWRVkYoXEkq3R3paCoxG9PXP74SnV18= +github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= +github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= github.com/puzpuzpuz/xsync/v2 v2.5.1 h1:mVGYAvzDSu52+zaGyNjC+24Xw2bQi3kTr4QJ6N9pIIU= github.com/puzpuzpuz/xsync/v2 v2.5.1/go.mod h1:gD2H2krq/w52MfPLE+Uy64TzJDVY7lP2znR9qmR35kU= github.com/qri-io/jsonpointer v0.1.1 h1:prVZBZLL6TW5vsSB9fFHFAMBLI4b0ri5vribQlTJiBA= @@ -397,8 +401,6 @@ go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.35.0 h1:m639+ go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.35.0/go.mod h1:LjReUci/F4BUyv+y4dwnq3h/26iNOeC3wAIqgvTIZVo= go.opentelemetry.io/otel/exporters/prometheus v0.49.0 h1:Er5I1g/YhfYv9Affk9nJLfH/+qCCVVg1f2R9AbJfqDQ= go.opentelemetry.io/otel/exporters/prometheus v0.49.0/go.mod h1:KfQ1wpjf3zsHjzP149P4LyAwWRupc6c7t1ZJ9eXpKQM= -go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.35.0 h1:PB3Zrjs1sG1GBX51SXyTSoOTqcDglmsk7nT6tkKPb/k= -go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.35.0/go.mod h1:U2R3XyVPzn0WX7wOIypPuptulsMcPDPs/oiSVOMVnHY= go.opentelemetry.io/otel/metric v1.35.0 h1:0znxYu2SNyuMSQT4Y9WDWej0VpcsxkuklLa4/siN90M= go.opentelemetry.io/otel/metric v1.35.0/go.mod h1:nKVFgxBZ2fReX6IlyW28MgZojkoAkJGaE8CpgeAU3oE= go.opentelemetry.io/otel/sdk v1.35.0 h1:iPctf8iprVySXSKJffSS79eOjl9pvxV9ZqOWT0QejKY= diff --git a/internal/job/tracing.go b/internal/job/tracing.go index 7ed6476352..a726eaf93e 100644 --- a/internal/job/tracing.go +++ b/internal/job/tracing.go @@ -12,6 +12,8 @@ import ( "github.com/buildkite/agent/v3/tracetools" "github.com/buildkite/agent/v3/version" "github.com/opentracing/opentracing-go" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/expfmt" "go.opentelemetry.io/contrib/propagators/aws/xray" "go.opentelemetry.io/contrib/propagators/b3" "go.opentelemetry.io/contrib/propagators/jaeger" @@ -20,9 +22,8 @@ import ( "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/exporters/otlp/otlptrace" "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" - "go.opentelemetry.io/otel/exporters/stdout/stdoutmetric" + prometheusexp "go.opentelemetry.io/otel/exporters/prometheus" "go.opentelemetry.io/otel/propagation" - "go.opentelemetry.io/otel/sdk/metric" sdkmetric "go.opentelemetry.io/otel/sdk/metric" "go.opentelemetry.io/otel/sdk/resource" sdktrace "go.opentelemetry.io/otel/sdk/trace" @@ -151,14 +152,14 @@ func (e *Executor) startTracingOpenTelemetry(ctx context.Context) (tracetools.Sp resources := resource.NewWithAttributes(semconv.SchemaURL, attributes...) - consoleExporter, err := stdoutmetric.New() + promExporter, err := prometheusexp.New() if err != nil { e.shell.Errorf("Error creating prom metric exporter %s. Disabling tracing.", err) return &tracetools.NoopSpan{}, ctx, noopStopper } meterProvider := sdkmetric.NewMeterProvider( - sdkmetric.WithReader(metric.NewPeriodicReader(consoleExporter)), + sdkmetric.WithReader(promExporter), sdkmetric.WithResource(resources), ) otel.SetMeterProvider(meterProvider) @@ -203,13 +204,14 @@ func (e *Executor) startTracingOpenTelemetry(ctx context.Context) (tracetools.Sp ) stop := func() { + writeGlobalMetricsToStdout() ctx := context.Background() + _ = spanMetricsProcessor.ForceFlush(ctx) + _ = spanMetricsProcessor.Shutdown(ctx) _ = meterProvider.ForceFlush(ctx) _ = meterProvider.Shutdown(ctx) _ = tracerProvider.ForceFlush(ctx) _ = tracerProvider.Shutdown(ctx) - _ = spanMetricsProcessor.ForceFlush(ctx) - _ = spanMetricsProcessor.Shutdown(ctx) } return tracetools.NewOpenTelemetrySpan(span), ctx, stop @@ -328,3 +330,20 @@ func (e *Executor) implementationSpecificSpanName(otelName, ddName string) strin return otelName } } + +func writeGlobalMetricsToStdout() { + // Gather all metrics from the global registry + metricFamilies, err := prometheus.DefaultGatherer.Gather() + if err != nil { + fmt.Printf("Error gathering metrics: %v\n", err) + return + } + + // Create an encoder for the text format + encoder := expfmt.NewEncoder(os.Stdout, expfmt.FmtText) + + // Encode each metric family + for _, mf := range metricFamilies { + encoder.Encode(mf) + } +} From 3a3a355f0b07a8587c318a7633a87d05ca937042 Mon Sep 17 00:00:00 2001 From: Mark Wolfe Date: Mon, 7 Apr 2025 09:32:41 +1000 Subject: [PATCH 3/4] chore: print out vars which could be used for a file path --- internal/job/tracing.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/internal/job/tracing.go b/internal/job/tracing.go index a726eaf93e..2f85ac0d46 100644 --- a/internal/job/tracing.go +++ b/internal/job/tracing.go @@ -204,7 +204,7 @@ func (e *Executor) startTracingOpenTelemetry(ctx context.Context) (tracetools.Sp ) stop := func() { - writeGlobalMetricsToStdout() + e.writeGlobalMetricsToStdout() ctx := context.Background() _ = spanMetricsProcessor.ForceFlush(ctx) _ = spanMetricsProcessor.Shutdown(ctx) @@ -331,7 +331,10 @@ func (e *Executor) implementationSpecificSpanName(otelName, ddName string) strin } } -func writeGlobalMetricsToStdout() { +func (e *Executor) writeGlobalMetricsToStdout() { + + fmt.Printf("Writing global metrics to %s %s %s\n", e.BuildPath, e.AgentName, e.JobID) + // Gather all metrics from the global registry metricFamilies, err := prometheus.DefaultGatherer.Gather() if err != nil { From f9ec79c2914c8c1544be5c4b5261c2a7e1368c24 Mon Sep 17 00:00:00 2001 From: Mark Wolfe Date: Tue, 15 Apr 2025 13:22:34 +1000 Subject: [PATCH 4/4] chore: tidy up commented code --- tracetools/metrics.go | 66 ------------------------------------------- 1 file changed, 66 deletions(-) diff --git a/tracetools/metrics.go b/tracetools/metrics.go index b7762e00b5..4c231da966 100644 --- a/tracetools/metrics.go +++ b/tracetools/metrics.go @@ -9,7 +9,6 @@ import ( "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/metric" sdktrace "go.opentelemetry.io/otel/sdk/trace" - // semconv "go.opentelemetry.io/otel/semconv/v1.17.0" ) // SpanMetricsProcessor implements a trace processor that generates metrics from spans @@ -79,7 +78,6 @@ func (smp *SpanMetricsProcessor) OnEnd(s sdktrace.ReadOnlySpan) { // Extract relevant attributes from the span attrs := []attribute.KeyValue{ - // semconv.ServiceNameKey.String(s.Resource().Attributes()[semconv.ServiceNameKey].AsString()), attribute.String("span.name", s.Name()), attribute.String("span.kind", s.SpanKind().String()), } @@ -117,67 +115,3 @@ func (smp *SpanMetricsProcessor) ForceFlush(ctx context.Context) error { } return nil } - -// func main() { -// // Initialize resource describing the service -// res, err := resource.New(context.Background(), -// resource.WithAttributes( -// semconv.ServiceNameKey.String("my-service"), -// semconv.ServiceVersionKey.String("1.0.0"), -// ), -// ) -// if err != nil { -// log.Fatalf("Failed to create resource: %v", err) -// } - -// // Set up the prometheus exporter -// promExporter, err := prometheus.New() -// if err != nil { -// log.Fatalf("Failed to create Prometheus exporter: %v", err) -// } - -// // Create a metrics provider -// meterProvider := sdkmetric.NewMeterProvider( -// sdkmetric.WithReader(promExporter), -// sdkmetric.WithResource(res), -// ) -// otel.SetMeterProvider(meterProvider) - -// // Create a batch span processor for normal span processing -// batchProcessor := sdktrace.NewBatchSpanProcessor( -// // You would configure an actual exporter here if needed -// sdktrace.NewNoopSpanExporter(), -// ) - -// // Create our custom SpanMetrics processor -// spanMetricsProcessor, err := NewSpanMetricsProcessor(meterProvider, batchProcessor) -// if err != nil { -// log.Fatalf("Failed to create SpanMetrics processor: %v", err) -// } - -// // Create a tracer provider with our custom processor -// tracerProvider := sdktrace.NewTracerProvider( -// sdktrace.WithSampler(sdktrace.AlwaysSample()), -// sdktrace.WithSpanProcessor(spanMetricsProcessor), -// sdktrace.WithResource(res), -// ) -// otel.SetTracerProvider(tracerProvider) - -// // Expose Prometheus metrics endpoint -// http.Handle("/metrics", promhttp.Handler()) -// go func() { -// log.Println("Starting metrics server at :8889") -// if err := http.ListenAndServe(":8889", nil); err != nil { -// log.Fatalf("Failed to start metrics server: %v", err) -// } -// }() - -// // Create a tracer -// tracer := tracerProvider.Tracer("my-service-tracer") - -// // Your application logic here -// for { -// runSample(tracer) -// time.Sleep(1 * time.Second) -// } -// }