Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ require (
github.com/oleiade/reflections v1.1.0
github.com/opentracing/opentracing-go v1.2.0
github.com/pborman/uuid v1.2.1
github.com/prometheus/client_golang v1.20.4
github.com/prometheus/common v0.63.0
github.com/puzpuzpuz/xsync/v2 v2.5.1
github.com/qri-io/jsonschema v0.2.1
github.com/stretchr/testify v1.10.0
Expand All @@ -47,7 +49,10 @@ require (
go.opentelemetry.io/otel v1.35.0
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.35.0
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.35.0
go.opentelemetry.io/otel/exporters/prometheus v0.49.0
go.opentelemetry.io/otel/metric v1.35.0
go.opentelemetry.io/otel/sdk v1.35.0
go.opentelemetry.io/otel/sdk/metric v1.35.0
go.opentelemetry.io/otel/trace v1.35.0
golang.org/x/crypto v0.37.0
golang.org/x/net v0.38.0
Expand Down Expand Up @@ -94,6 +99,7 @@ require (
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.30.1 // indirect
github.com/aws/aws-sdk-go-v2/service/sts v1.33.18 // indirect
github.com/aws/smithy-go v1.22.2 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/bmatcuk/doublestar/v4 v4.6.1 // indirect
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
Expand Down Expand Up @@ -130,12 +136,15 @@ require (
github.com/mitchellh/mapstructure v1.5.1-0.20231216201459-8508981c8b6c // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/outcaste-io/ristretto v0.2.3 // indirect
github.com/philhofer/fwd v1.1.3-0.20240612014219-fbbf4953d986 // indirect
github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/power-devops/perfstat v0.0.0-20220216144756-c35f1ee13d7c // indirect
github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
github.com/qri-io/jsonpointer v0.1.1 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/ryanuber/go-glob v1.0.0 // indirect
Expand All @@ -155,7 +164,6 @@ require (
go.opentelemetry.io/collector/pdata/pprofile v0.104.0 // indirect
go.opentelemetry.io/collector/semconv v0.104.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.59.0 // indirect
go.opentelemetry.io/otel/metric v1.35.0 // indirect
go.opentelemetry.io/proto/otlp v1.5.0 // indirect
go.uber.org/atomic v1.11.0 // indirect
go.uber.org/multierr v1.11.0 // indirect
Expand Down
20 changes: 12 additions & 8 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,8 @@ github.com/keybase/go-keychain v0.0.0-20231219164618-57a3676c3af6 h1:IsMZxCuZqKu
github.com/keybase/go-keychain v0.0.0-20231219164618-57a3676c3af6/go.mod h1:3VeWNIJaW+O5xpRQbPp0Ybqu1vJd/pm7s2F473HRrkw=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA=
github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
Expand Down Expand Up @@ -268,6 +270,8 @@ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/oleiade/reflections v1.1.0 h1:D+I/UsXQB4esMathlt0kkZRJZdUDmhv5zGi/HOwYTWo=
github.com/oleiade/reflections v1.1.0/go.mod h1:mCxx0QseeVCHs5Um5HhJeCKVC7AwS8kO67tky4rdisA=
github.com/opentracing/opentracing-go v1.2.0 h1:uEJPy/1a5RIPAJ0Ov+OIO8OxWu77jEv+1B0VhjKrZUs=
Expand All @@ -289,14 +293,14 @@ github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndr
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE=
github.com/power-devops/perfstat v0.0.0-20220216144756-c35f1ee13d7c h1:NRoLoZvkBTKvR5gQLgA3e0hqjkY9u1wm+iOL45VN/qI=
github.com/power-devops/perfstat v0.0.0-20220216144756-c35f1ee13d7c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE=
github.com/prometheus/client_golang v1.19.1 h1:wZWJDwK+NameRJuPGDhlnFgx8e8HN3XHQeLaYJFJBOE=
github.com/prometheus/client_golang v1.19.1/go.mod h1:mP78NwGzrVks5S2H6ab8+ZZGJLZUq1hoULYBAYBw1Ho=
github.com/prometheus/client_golang v1.20.4 h1:Tgh3Yr67PaOv/uTqloMsCEdeuFTatm5zIq5+qNN23vI=
github.com/prometheus/client_golang v1.20.4/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE=
github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
github.com/prometheus/common v0.54.0 h1:ZlZy0BgJhTwVZUn7dLOkwCZHUkrAqd3WYtcFCWnM1D8=
github.com/prometheus/common v0.54.0/go.mod h1:/TQgMJP5CuVYveyT7n/0Ix8yLNNXy9yRSkhnLTHPDIQ=
github.com/prometheus/procfs v0.15.0 h1:A82kmvXJq2jTu5YUhSGNlYoxh85zLnKgPz4bMZgI5Ek=
github.com/prometheus/procfs v0.15.0/go.mod h1:Y0RJ/Y5g5wJpkTisOtqwDSo4HwhGmLB4VQSw2sQJLHk=
github.com/prometheus/common v0.63.0 h1:YR/EIY1o3mEFP/kZCD7iDMnLPlGyuU2Gb3HIcXnA98k=
github.com/prometheus/common v0.63.0/go.mod h1:VVFF/fBIoToEnWRVkYoXEkq3R3paCoxG9PXP74SnV18=
github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
github.com/puzpuzpuz/xsync/v2 v2.5.1 h1:mVGYAvzDSu52+zaGyNjC+24Xw2bQi3kTr4QJ6N9pIIU=
github.com/puzpuzpuz/xsync/v2 v2.5.1/go.mod h1:gD2H2krq/w52MfPLE+Uy64TzJDVY7lP2znR9qmR35kU=
github.com/qri-io/jsonpointer v0.1.1 h1:prVZBZLL6TW5vsSB9fFHFAMBLI4b0ri5vribQlTJiBA=
Expand Down Expand Up @@ -401,8 +405,8 @@ go.opentelemetry.io/otel/metric v1.35.0 h1:0znxYu2SNyuMSQT4Y9WDWej0VpcsxkuklLa4/
go.opentelemetry.io/otel/metric v1.35.0/go.mod h1:nKVFgxBZ2fReX6IlyW28MgZojkoAkJGaE8CpgeAU3oE=
go.opentelemetry.io/otel/sdk v1.35.0 h1:iPctf8iprVySXSKJffSS79eOjl9pvxV9ZqOWT0QejKY=
go.opentelemetry.io/otel/sdk v1.35.0/go.mod h1:+ga1bZliga3DxJ3CQGg3updiaAJoNECOgJREo9KHGQg=
go.opentelemetry.io/otel/sdk/metric v1.34.0 h1:5CeK9ujjbFVL5c1PhLuStg1wxA7vQv7ce1EK0Gyvahk=
go.opentelemetry.io/otel/sdk/metric v1.34.0/go.mod h1:jQ/r8Ze28zRKoNRdkjCZxfs6YvBTG1+YIqyFVFYec5w=
go.opentelemetry.io/otel/sdk/metric v1.35.0 h1:1RriWBmCKgkeHEhM7a2uMjMUfP7MsOF5JpUCaEqEI9o=
go.opentelemetry.io/otel/sdk/metric v1.35.0/go.mod h1:is6XYCUMpcKi+ZsOvfluY5YstFnhW0BidkR+gL+qN+w=
go.opentelemetry.io/otel/trace v1.35.0 h1:dPpEfJu1sDIqruz7BHFG3c7528f6ddfSWfFDVt/xgMs=
go.opentelemetry.io/otel/trace v1.35.0/go.mod h1:WUk7DtFp1Aw2MkvqGdwiXYDZZNvA/1J8o6xRXLrIkyc=
go.opentelemetry.io/proto/otlp v1.5.0 h1:xJvq7gMzB31/d406fB8U5CBdyQGw4P399D1aQWU/3i4=
Expand Down
54 changes: 54 additions & 0 deletions internal/job/tracing.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ import (
"github.com/buildkite/agent/v3/tracetools"
"github.com/buildkite/agent/v3/version"
"github.com/opentracing/opentracing-go"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/expfmt"
"go.opentelemetry.io/contrib/propagators/aws/xray"
"go.opentelemetry.io/contrib/propagators/b3"
"go.opentelemetry.io/contrib/propagators/jaeger"
Expand All @@ -20,7 +22,9 @@ import (
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
prometheusexp "go.opentelemetry.io/otel/exporters/prometheus"
"go.opentelemetry.io/otel/propagation"
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
"go.opentelemetry.io/otel/sdk/resource"
sdktrace "go.opentelemetry.io/otel/sdk/trace"
semconv "go.opentelemetry.io/otel/semconv/v1.4.0"
Expand Down Expand Up @@ -147,8 +151,33 @@ func (e *Executor) startTracingOpenTelemetry(ctx context.Context) (tracetools.Sp
attributes = append(attributes, extras...)

resources := resource.NewWithAttributes(semconv.SchemaURL, attributes...)

promExporter, err := prometheusexp.New()
if err != nil {
e.shell.Errorf("Error creating prom metric exporter %s. Disabling tracing.", err)
return &tracetools.NoopSpan{}, ctx, noopStopper
}

meterProvider := sdkmetric.NewMeterProvider(
sdkmetric.WithReader(promExporter),
sdkmetric.WithResource(resources),
)
otel.SetMeterProvider(meterProvider)

batchProcessor := sdktrace.NewBatchSpanProcessor(
// You would configure an actual exporter here if needed
exporter,
)

spanMetricsProcessor, err := tracetools.NewSpanMetricsProcessor(meterProvider, batchProcessor)
if err != nil {
e.shell.Errorf("Error creating OTLP metric exporter %s. Disabling tracing.", err)
return &tracetools.NoopSpan{}, ctx, noopStopper
}

tracerProvider := sdktrace.NewTracerProvider(
sdktrace.WithBatcher(exporter),
sdktrace.WithSpanProcessor(spanMetricsProcessor),
sdktrace.WithResource(resources),
)

Expand All @@ -175,7 +204,12 @@ func (e *Executor) startTracingOpenTelemetry(ctx context.Context) (tracetools.Sp
)

stop := func() {
e.writeGlobalMetricsToStdout()
ctx := context.Background()
_ = spanMetricsProcessor.ForceFlush(ctx)
_ = spanMetricsProcessor.Shutdown(ctx)
_ = meterProvider.ForceFlush(ctx)
_ = meterProvider.Shutdown(ctx)
_ = tracerProvider.ForceFlush(ctx)
_ = tracerProvider.Shutdown(ctx)
}
Expand Down Expand Up @@ -296,3 +330,23 @@ func (e *Executor) implementationSpecificSpanName(otelName, ddName string) strin
return otelName
}
}

func (e *Executor) writeGlobalMetricsToStdout() {

fmt.Printf("Writing global metrics to %s %s %s\n", e.BuildPath, e.AgentName, e.JobID)

// Gather all metrics from the global registry
metricFamilies, err := prometheus.DefaultGatherer.Gather()
if err != nil {
fmt.Printf("Error gathering metrics: %v\n", err)
return
}

// Create an encoder for the text format
encoder := expfmt.NewEncoder(os.Stdout, expfmt.FmtText)

// Encode each metric family
for _, mf := range metricFamilies {
encoder.Encode(mf)
}
}
117 changes: 117 additions & 0 deletions tracetools/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
package tracetools

import (
"context"
"fmt"
"sync"
"time"

"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
sdktrace "go.opentelemetry.io/otel/sdk/trace"
)

// SpanMetricsProcessor implements a trace processor that generates metrics from spans
type SpanMetricsProcessor struct {
meter metric.Meter
histogramMetric metric.Float64Histogram
counterMetric metric.Int64Counter
errorCounter metric.Int64Counter
mutex sync.Mutex
nextProcessor sdktrace.SpanProcessor
}

// NewSpanMetricsProcessor creates a new SpanMetricsProcessor
func NewSpanMetricsProcessor(mp metric.MeterProvider, nextProcessor sdktrace.SpanProcessor) (*SpanMetricsProcessor, error) {
meter := mp.Meter("span-metrics")

// Create a histogram for span durations
histogram, err := meter.Float64Histogram(
"span.duration",
metric.WithDescription("The duration of spans"),
metric.WithUnit("ms"),
)
if err != nil {
return nil, fmt.Errorf("failed to create duration histogram: %w", err)
}

// Create a counter for span counts
counter, err := meter.Int64Counter(
"span.count",
metric.WithDescription("The number of spans processed"),
metric.WithUnit("1"),
)
if err != nil {
return nil, fmt.Errorf("failed to create span counter: %w", err)
}

// Create a counter for span errors
errorCounter, err := meter.Int64Counter(
"span.errors",
metric.WithDescription("The number of errored spans"),
metric.WithUnit("1"),
)
if err != nil {
return nil, fmt.Errorf("failed to create error counter: %w", err)
}

return &SpanMetricsProcessor{
meter: meter,
histogramMetric: histogram,
counterMetric: counter,
errorCounter: errorCounter,
nextProcessor: nextProcessor,
}, nil
}

// OnStart implements the SpanProcessor interface
func (smp *SpanMetricsProcessor) OnStart(parent context.Context, s sdktrace.ReadWriteSpan) {
if smp.nextProcessor != nil {
smp.nextProcessor.OnStart(parent, s)
}
}

// OnEnd implements the SpanProcessor interface
func (smp *SpanMetricsProcessor) OnEnd(s sdktrace.ReadOnlySpan) {
smp.mutex.Lock()
defer smp.mutex.Unlock()

// Extract relevant attributes from the span
attrs := []attribute.KeyValue{
attribute.String("span.name", s.Name()),
attribute.String("span.kind", s.SpanKind().String()),
}

// Record duration in milliseconds
durationMs := float64(s.EndTime().Sub(s.StartTime())) / float64(time.Millisecond)
smp.histogramMetric.Record(context.Background(), durationMs, metric.WithAttributes(attrs...))

// Record span count
smp.counterMetric.Add(context.Background(), 1, metric.WithAttributes(attrs...))

// Record error count if span has error status
if s.Status().Code == 2 { // Error status
smp.errorCounter.Add(context.Background(), 1, metric.WithAttributes(attrs...))
}

// Pass to next processor if there is one
if smp.nextProcessor != nil {
smp.nextProcessor.OnEnd(s)
}
}

// Shutdown implements the SpanProcessor interface
func (smp *SpanMetricsProcessor) Shutdown(ctx context.Context) error {
if smp.nextProcessor != nil {
return smp.nextProcessor.Shutdown(ctx)
}
return nil
}

// ForceFlush implements the SpanProcessor interface
func (smp *SpanMetricsProcessor) ForceFlush(ctx context.Context) error {
if smp.nextProcessor != nil {
return smp.nextProcessor.ForceFlush(ctx)
}
return nil
}