Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions collector/fixtures/e2e-64k-page-output.txt
Original file line number Diff line number Diff line change
Expand Up @@ -840,6 +840,11 @@ node_infiniband_multicast_packets_received_total{device="mlx4_0",port="2"} 0
# TYPE node_infiniband_multicast_packets_transmitted_total counter
node_infiniband_multicast_packets_transmitted_total{device="mlx4_0",port="1"} 16
node_infiniband_multicast_packets_transmitted_total{device="mlx4_0",port="2"} 0
# HELP node_infiniband_physical_state_id Physical state of the InfiniBand port (0: no change, 1: sleep, 2: polling, 3: disable, 4: shift, 5: link up, 6: link error recover, 7: phytest)
# TYPE node_infiniband_physical_state_id gauge
node_infiniband_physical_state_id{device="i40iw0",port="1"} 5
node_infiniband_physical_state_id{device="mlx4_0",port="1"} 5
node_infiniband_physical_state_id{device="mlx4_0",port="2"} 5
# HELP node_infiniband_port_constraint_errors_received_total Number of packets received on the switch physical port that are discarded
# TYPE node_infiniband_port_constraint_errors_received_total counter
node_infiniband_port_constraint_errors_received_total{device="mlx4_0",port="1"} 0
Expand Down Expand Up @@ -872,6 +877,16 @@ node_infiniband_port_packets_transmitted_total{device="mlx4_0",port="1"} 6.23586
# HELP node_infiniband_port_transmit_wait_total Number of ticks during which the port had data to transmit but no data was sent during the entire tick
# TYPE node_infiniband_port_transmit_wait_total counter
node_infiniband_port_transmit_wait_total{device="mlx4_0",port="1"} 4.294967295e+09
# HELP node_infiniband_rate_bytes_per_second Maximum signal transfer rate
# TYPE node_infiniband_rate_bytes_per_second gauge
node_infiniband_rate_bytes_per_second{device="i40iw0",port="1"} 1.25e+09
node_infiniband_rate_bytes_per_second{device="mlx4_0",port="1"} 5e+09
node_infiniband_rate_bytes_per_second{device="mlx4_0",port="2"} 5e+09
# HELP node_infiniband_state_id State of the InfiniBand port (0: no change, 1: down, 2: init, 3: armed, 4: active, 5: act defer)
# TYPE node_infiniband_state_id gauge
node_infiniband_state_id{device="i40iw0",port="1"} 4
node_infiniband_state_id{device="mlx4_0",port="1"} 4
node_infiniband_state_id{device="mlx4_0",port="2"} 4
# HELP node_infiniband_unicast_packets_received_total Number of unicast packets received (including errors)
# TYPE node_infiniband_unicast_packets_received_total counter
node_infiniband_unicast_packets_received_total{device="mlx4_0",port="1"} 61148
Expand Down
15 changes: 15 additions & 0 deletions collector/fixtures/e2e-output.txt
Original file line number Diff line number Diff line change
Expand Up @@ -840,6 +840,11 @@ node_infiniband_multicast_packets_received_total{device="mlx4_0",port="2"} 0
# TYPE node_infiniband_multicast_packets_transmitted_total counter
node_infiniband_multicast_packets_transmitted_total{device="mlx4_0",port="1"} 16
node_infiniband_multicast_packets_transmitted_total{device="mlx4_0",port="2"} 0
# HELP node_infiniband_physical_state_id Physical state of the InfiniBand port (0: no change, 1: sleep, 2: polling, 3: disable, 4: shift, 5: link up, 6: link error recover, 7: phytest)
# TYPE node_infiniband_physical_state_id gauge
node_infiniband_physical_state_id{device="i40iw0",port="1"} 5
node_infiniband_physical_state_id{device="mlx4_0",port="1"} 5
node_infiniband_physical_state_id{device="mlx4_0",port="2"} 5
# HELP node_infiniband_port_constraint_errors_received_total Number of packets received on the switch physical port that are discarded
# TYPE node_infiniband_port_constraint_errors_received_total counter
node_infiniband_port_constraint_errors_received_total{device="mlx4_0",port="1"} 0
Expand Down Expand Up @@ -872,6 +877,16 @@ node_infiniband_port_packets_transmitted_total{device="mlx4_0",port="1"} 6.23586
# HELP node_infiniband_port_transmit_wait_total Number of ticks during which the port had data to transmit but no data was sent during the entire tick
# TYPE node_infiniband_port_transmit_wait_total counter
node_infiniband_port_transmit_wait_total{device="mlx4_0",port="1"} 4.294967295e+09
# HELP node_infiniband_rate_bytes_per_second Maximum signal transfer rate
# TYPE node_infiniband_rate_bytes_per_second gauge
node_infiniband_rate_bytes_per_second{device="i40iw0",port="1"} 1.25e+09
node_infiniband_rate_bytes_per_second{device="mlx4_0",port="1"} 5e+09
node_infiniband_rate_bytes_per_second{device="mlx4_0",port="2"} 5e+09
# HELP node_infiniband_state_id State of the InfiniBand port (0: no change, 1: down, 2: init, 3: armed, 4: active, 5: act defer)
# TYPE node_infiniband_state_id gauge
node_infiniband_state_id{device="i40iw0",port="1"} 4
node_infiniband_state_id{device="mlx4_0",port="1"} 4
node_infiniband_state_id{device="mlx4_0",port="2"} 4
# HELP node_infiniband_unicast_packets_received_total Number of unicast packets received (including errors)
# TYPE node_infiniband_unicast_packets_received_total counter
node_infiniband_unicast_packets_received_total{device="mlx4_0",port="1"} 61148
Expand Down
7 changes: 7 additions & 0 deletions collector/infiniband_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ func NewInfiniBandCollector() (Collector, error) {
"link_error_recovery_total": "Number of times the link successfully recovered from an error state",
"multicast_packets_received_total": "Number of multicast packets received (including errors)",
"multicast_packets_transmitted_total": "Number of multicast packets transmitted (including errors)",
"physical_state_id": "Physical state of the InfiniBand port (0: no change, 1: sleep, 2: polling, 3: disable, 4: shift, 5: link up, 6: link error recover, 7: phytest)",
"port_constraint_errors_received_total": "Number of packets received on the switch physical port that are discarded",
"port_constraint_errors_transmitted_total": "Number of packets not transmitted from the switch physical port",
"port_data_received_bytes_total": "Number of data octets received on all links",
Expand All @@ -67,6 +68,8 @@ func NewInfiniBandCollector() (Collector, error) {
"port_packets_received_total": "Number of packets received on all VLs by this port (including errors)",
"port_packets_transmitted_total": "Number of packets transmitted on all VLs from this port (including errors)",
"port_transmit_wait_total": "Number of ticks during which the port had data to transmit but no data was sent during the entire tick",
"rate_bytes_per_second": "Maximum signal transfer rate",
"state_id": "State of the InfiniBand port (0: no change, 1: down, 2: init, 3: armed, 4: active, 5: act defer)",
"unicast_packets_received_total": "Number of unicast packets received (including errors)",
"unicast_packets_transmitted_total": "Number of unicast packets transmitted (including errors)",
}
Expand Down Expand Up @@ -105,6 +108,10 @@ func (c *infinibandCollector) Update(ch chan<- prometheus.Metric) error {
for _, port := range device.Ports {
portStr := strconv.FormatUint(uint64(port.Port), 10)

c.pushMetric(ch, "state_id", uint64(port.StateID), port.Name, portStr, prometheus.GaugeValue)
c.pushMetric(ch, "physical_state_id", uint64(port.PhysStateID), port.Name, portStr, prometheus.GaugeValue)
c.pushMetric(ch, "rate_bytes_per_second", port.Rate, port.Name, portStr, prometheus.GaugeValue)

c.pushCounter(ch, "legacy_multicast_packets_received_total", port.Counters.LegacyPortMulticastRcvPackets, port.Name, portStr)
c.pushCounter(ch, "legacy_multicast_packets_transmitted_total", port.Counters.LegacyPortMulticastXmitPackets, port.Name, portStr)
c.pushCounter(ch, "legacy_data_received_bytes_total", port.Counters.LegacyPortRcvData64, port.Name, portStr)
Expand Down