|
3 | 3 | package agent |
4 | 4 |
|
5 | 5 | import ( |
| 6 | + "context" |
6 | 7 | "net/http" |
7 | 8 | "strings" |
| 9 | + "sync" |
8 | 10 |
|
9 | 11 | "github.com/prometheus/client_golang/prometheus" |
10 | 12 | "github.com/prometheus/client_golang/prometheus/promhttp" |
| 13 | + "go.opentelemetry.io/otel/attribute" |
| 14 | + "go.opentelemetry.io/otel/metric" |
11 | 15 | ) |
12 | 16 |
|
13 | 17 | const metricsPort = ":9090" |
14 | 18 |
|
| 19 | +type vmStateEntry struct { |
| 20 | + state string |
| 21 | + node string |
| 22 | +} |
| 23 | + |
| 24 | +type guestData struct { |
| 25 | + cpu float64 |
| 26 | + mem float64 |
| 27 | + disk float64 |
| 28 | + node string |
| 29 | + class string |
| 30 | +} |
| 31 | + |
15 | 32 | // VMMetricsCollector holds per-VM metric state for the node agent. |
16 | 33 | type VMMetricsCollector struct { |
17 | | - vmState *prometheus.GaugeVec |
18 | | - guestCPU *prometheus.GaugeVec |
19 | | - guestMem *prometheus.GaugeVec |
20 | | - guestDisk *prometheus.GaugeVec |
21 | | - reg *prometheus.Registry |
| 34 | + mu sync.RWMutex |
| 35 | + vmStates map[string]vmStateEntry // "ns/name" → {state, node} |
| 36 | + guestMetrics map[string]*guestData // "ns/name" → data |
| 37 | + gatherer prometheus.Gatherer |
22 | 38 | } |
23 | 39 |
|
24 | | -// NewVMMetricsCollector creates a new collector with its own registry. |
25 | | -func NewVMMetricsCollector() *VMMetricsCollector { |
26 | | - reg := prometheus.NewRegistry() |
| 40 | +// NewVMMetricsCollector creates a new collector using the provided OTel meter. |
| 41 | +// gatherer is the Prometheus registry used by the OTel Prometheus exporter; |
| 42 | +// it is used to serve the /metrics HTTP handler. |
| 43 | +func NewVMMetricsCollector(meter metric.Meter, gatherer prometheus.Gatherer) *VMMetricsCollector { |
27 | 44 | c := &VMMetricsCollector{ |
28 | | - vmState: prometheus.NewGaugeVec(prometheus.GaugeOpts{ |
29 | | - Name: "imp_vm_state", |
30 | | - Help: "Current VM state (1 = active state).", |
31 | | - }, []string{"impvm", "namespace", "node", "state"}), |
32 | | - guestCPU: prometheus.NewGaugeVec(prometheus.GaugeOpts{ |
33 | | - Name: "imp_vm_guest_cpu_usage_ratio", |
34 | | - Help: "Guest VM CPU usage ratio (0.0–1.0).", |
35 | | - }, []string{"impvm", "namespace", "node", "impvmclass"}), |
36 | | - guestMem: prometheus.NewGaugeVec(prometheus.GaugeOpts{ |
37 | | - Name: "imp_vm_guest_memory_used_bytes", |
38 | | - Help: "Guest VM memory used bytes.", |
39 | | - }, []string{"impvm", "namespace", "node", "impvmclass"}), |
40 | | - guestDisk: prometheus.NewGaugeVec(prometheus.GaugeOpts{ |
41 | | - Name: "imp_vm_guest_disk_used_bytes", |
42 | | - Help: "Guest VM root disk used bytes.", |
43 | | - }, []string{"impvm", "namespace", "node", "impvmclass"}), |
44 | | - reg: reg, |
| 45 | + vmStates: make(map[string]vmStateEntry), |
| 46 | + guestMetrics: make(map[string]*guestData), |
| 47 | + gatherer: gatherer, |
45 | 48 | } |
46 | | - reg.MustRegister(c.vmState, c.guestCPU, c.guestMem, c.guestDisk) |
47 | | - reg.MustRegister(prometheus.NewGoCollector(), prometheus.NewProcessCollector(prometheus.ProcessCollectorOpts{})) |
| 49 | + |
| 50 | + _, _ = meter.Float64ObservableGauge( |
| 51 | + "imp_vm_state", |
| 52 | + metric.WithDescription("Current VM state (1 = active state)."), |
| 53 | + metric.WithFloat64Callback(func(_ context.Context, o metric.Float64Observer) error { |
| 54 | + c.mu.RLock() |
| 55 | + defer c.mu.RUnlock() |
| 56 | + for key, entry := range c.vmStates { |
| 57 | + ns, name := splitKey(key) |
| 58 | + o.Observe(1, metric.WithAttributes( |
| 59 | + attribute.String("impvm", name), |
| 60 | + attribute.String("namespace", ns), |
| 61 | + attribute.String("node", entry.node), |
| 62 | + attribute.String("state", entry.state), |
| 63 | + )) |
| 64 | + } |
| 65 | + return nil |
| 66 | + }), |
| 67 | + ) |
| 68 | + |
| 69 | + _, _ = meter.Float64ObservableGauge( |
| 70 | + "imp_vm_guest_cpu_usage_ratio", |
| 71 | + metric.WithDescription("Guest VM CPU usage ratio (0.0–1.0)."), |
| 72 | + metric.WithFloat64Callback(func(_ context.Context, o metric.Float64Observer) error { |
| 73 | + c.mu.RLock() |
| 74 | + defer c.mu.RUnlock() |
| 75 | + for key, d := range c.guestMetrics { |
| 76 | + ns, name := splitKey(key) |
| 77 | + o.Observe(d.cpu, metric.WithAttributes( |
| 78 | + attribute.String("impvm", name), |
| 79 | + attribute.String("namespace", ns), |
| 80 | + attribute.String("node", d.node), |
| 81 | + attribute.String("impvmclass", d.class), |
| 82 | + )) |
| 83 | + } |
| 84 | + return nil |
| 85 | + }), |
| 86 | + ) |
| 87 | + |
| 88 | + _, _ = meter.Float64ObservableGauge( |
| 89 | + "imp_vm_guest_memory_used_bytes", |
| 90 | + metric.WithDescription("Guest VM memory used bytes."), |
| 91 | + metric.WithFloat64Callback(func(_ context.Context, o metric.Float64Observer) error { |
| 92 | + c.mu.RLock() |
| 93 | + defer c.mu.RUnlock() |
| 94 | + for key, d := range c.guestMetrics { |
| 95 | + ns, name := splitKey(key) |
| 96 | + o.Observe(d.mem, metric.WithAttributes( |
| 97 | + attribute.String("impvm", name), |
| 98 | + attribute.String("namespace", ns), |
| 99 | + attribute.String("node", d.node), |
| 100 | + attribute.String("impvmclass", d.class), |
| 101 | + )) |
| 102 | + } |
| 103 | + return nil |
| 104 | + }), |
| 105 | + ) |
| 106 | + |
| 107 | + _, _ = meter.Float64ObservableGauge( |
| 108 | + "imp_vm_guest_disk_used_bytes", |
| 109 | + metric.WithDescription("Guest VM root disk used bytes."), |
| 110 | + metric.WithFloat64Callback(func(_ context.Context, o metric.Float64Observer) error { |
| 111 | + c.mu.RLock() |
| 112 | + defer c.mu.RUnlock() |
| 113 | + for key, d := range c.guestMetrics { |
| 114 | + ns, name := splitKey(key) |
| 115 | + o.Observe(d.disk, metric.WithAttributes( |
| 116 | + attribute.String("impvm", name), |
| 117 | + attribute.String("namespace", ns), |
| 118 | + attribute.String("node", d.node), |
| 119 | + attribute.String("impvmclass", d.class), |
| 120 | + )) |
| 121 | + } |
| 122 | + return nil |
| 123 | + }), |
| 124 | + ) |
| 125 | + |
48 | 126 | return c |
49 | 127 | } |
50 | 128 |
|
51 | | -// SetVMState sets the imp_vm_state gauge for a VM. key = "namespace/name". |
52 | | -// Clears any previous state series for this VM so only one state is active at a time. |
| 129 | +// SetVMState sets the current state for a VM. Only one state is active per VM at a time. |
53 | 130 | func (c *VMMetricsCollector) SetVMState(key, state, node string) { |
54 | | - ns, name := splitKey(key) |
55 | | - // Remove stale state series before setting the new one to avoid double-counting. |
56 | | - c.vmState.DeletePartialMatch(prometheus.Labels{"impvm": name, "namespace": ns, "node": node}) |
57 | | - c.vmState.WithLabelValues(name, ns, node, state).Set(1) |
| 131 | + c.mu.Lock() |
| 132 | + defer c.mu.Unlock() |
| 133 | + c.vmStates[key] = vmStateEntry{state: state, node: node} |
58 | 134 | } |
59 | 135 |
|
60 | 136 | // SetGuestMetrics updates guest agent metrics for a VM. |
61 | 137 | func (c *VMMetricsCollector) SetGuestMetrics(key, node, impvmclass string, cpu float64, mem, disk int64) { |
62 | | - ns, name := splitKey(key) |
63 | | - c.guestCPU.WithLabelValues(name, ns, node, impvmclass).Set(cpu) |
64 | | - c.guestMem.WithLabelValues(name, ns, node, impvmclass).Set(float64(mem)) |
65 | | - c.guestDisk.WithLabelValues(name, ns, node, impvmclass).Set(float64(disk)) |
| 138 | + c.mu.Lock() |
| 139 | + defer c.mu.Unlock() |
| 140 | + c.guestMetrics[key] = &guestData{ |
| 141 | + cpu: cpu, |
| 142 | + mem: float64(mem), |
| 143 | + disk: float64(disk), |
| 144 | + node: node, |
| 145 | + class: impvmclass, |
| 146 | + } |
66 | 147 | } |
67 | 148 |
|
68 | | -// ClearVM removes all metric series for a VM when it's deleted. |
| 149 | +// ClearVM removes all metric state for a VM when it is deleted. |
69 | 150 | func (c *VMMetricsCollector) ClearVM(key string) { |
70 | | - ns, name := splitKey(key) |
71 | | - c.vmState.DeletePartialMatch(prometheus.Labels{"impvm": name, "namespace": ns}) |
72 | | - c.guestCPU.DeletePartialMatch(prometheus.Labels{"impvm": name, "namespace": ns}) |
73 | | - c.guestMem.DeletePartialMatch(prometheus.Labels{"impvm": name, "namespace": ns}) |
74 | | - c.guestDisk.DeletePartialMatch(prometheus.Labels{"impvm": name, "namespace": ns}) |
75 | | -} |
76 | | - |
77 | | -// NewMetricsHandler returns an HTTP handler for the default Prometheus registry. |
78 | | -func NewMetricsHandler() http.Handler { |
79 | | - return promhttp.Handler() |
| 151 | + c.mu.Lock() |
| 152 | + defer c.mu.Unlock() |
| 153 | + delete(c.vmStates, key) |
| 154 | + delete(c.guestMetrics, key) |
80 | 155 | } |
81 | 156 |
|
82 | | -// NewMetricsHandlerWithCollector returns an HTTP handler for the given collector's registry. |
| 157 | +// NewMetricsHandlerWithCollector returns an HTTP handler for the collector's Prometheus registry. |
83 | 158 | func NewMetricsHandlerWithCollector(c *VMMetricsCollector) http.Handler { |
84 | | - return promhttp.HandlerFor(c.reg, promhttp.HandlerOpts{}) |
| 159 | + return promhttp.HandlerFor(c.gatherer, promhttp.HandlerOpts{}) |
85 | 160 | } |
86 | 161 |
|
87 | 162 | func splitKey(key string) (ns, name string) { |
|
0 commit comments