From 66c542f6eca7d6dad99b8757e6f8806a0514715a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 24 Apr 2023 14:16:42 -0500 Subject: [PATCH 1/2] netlink: read missing attributes from sysfs Read dev_id, name_assign_type, and addr_assign_type from sysfs anyway, since they only take a device-specific lock and not the whole RTNL lock. This means they are much less impactful to read on other system processes. Signed-off-by: Dan Williams --- collector/netclass_rtnl_linux.go | 86 +++++++++++++++++++++++++++++++- 1 file changed, 84 insertions(+), 2 deletions(-) diff --git a/collector/netclass_rtnl_linux.go b/collector/netclass_rtnl_linux.go index aecf27aeeb..051ed7466b 100644 --- a/collector/netclass_rtnl_linux.go +++ b/collector/netclass_rtnl_linux.go @@ -17,14 +17,19 @@ package collector import ( + "bytes" "errors" "fmt" "io/fs" + "os" + "path/filepath" + "strconv" "github.com/go-kit/log/level" "github.com/jsimonetti/rtnetlink" "github.com/mdlayher/ethtool" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/procfs/sysfs" "gopkg.in/alecthomas/kingpin.v2" ) @@ -57,14 +62,27 @@ func (c *netClassCollector) netClassRTNLUpdate(ch chan<- prometheus.Metric) erro } } + // Get most attributes from Netlink lMsgs, err := c.getNetClassInfoRTNL() if err != nil { return fmt.Errorf("could not get net class info: %w", err) } + + relevantLinks := make([]rtnetlink.LinkMessage, 0, len(lMsgs)) for _, msg := range lMsgs { - if c.ignoredDevicesPattern.MatchString(msg.Attributes.Name) { - continue + if !c.ignoredDevicesPattern.MatchString(msg.Attributes.Name) { + relevantLinks = append(relevantLinks, msg) } + } + + // Read sysfs for attributes that Netlink doesn't expose + sysfsAttrs, err := getSysfsAttributes(relevantLinks) + if err != nil { + return fmt.Errorf("could not get sysfs device info: %w", err) + } + + // Parse all the info and update metrics + for _, msg := range relevantLinks { upDesc := prometheus.NewDesc( prometheus.BuildFQName(namespace, c.subsystem, "up"), "Value is 1 if operstate is 'up', 0 otherwise.", @@ -96,12 +114,16 @@ func (c *netClassCollector) netClassRTNLUpdate(ch chan<- prometheus.Metric) erro duplex = lm.Duplex.String() } + ifaceInfo := sysfsAttrs[msg.Attributes.Name] + ch <- prometheus.MustNewConstMetric(infoDesc, prometheus.GaugeValue, infoValue, msg.Attributes.Name, msg.Attributes.Address.String(), msg.Attributes.Broadcast.String(), duplex, operstateStr[int(msg.Attributes.OperationalState)], ifalias) + pushMetric(ch, c.getFieldDesc("address_assign_type"), "address_assign_type", ifaceInfo.AddrAssignType, prometheus.GaugeValue, msg.Attributes.Name) pushMetric(ch, c.getFieldDesc("carrier"), "carrier", msg.Attributes.Carrier, prometheus.GaugeValue, msg.Attributes.Name) pushMetric(ch, c.getFieldDesc("carrier_changes_total"), "carrier_changes_total", msg.Attributes.CarrierChanges, prometheus.CounterValue, msg.Attributes.Name) pushMetric(ch, c.getFieldDesc("carrier_up_changes_total"), "carrier_up_changes_total", msg.Attributes.CarrierUpCount, prometheus.CounterValue, msg.Attributes.Name) pushMetric(ch, c.getFieldDesc("carrier_down_changes_total"), "carrier_down_changes_total", msg.Attributes.CarrierDownCount, prometheus.CounterValue, msg.Attributes.Name) + pushMetric(ch, c.getFieldDesc("device_id"), "device_id", ifaceInfo.DevID, prometheus.GaugeValue, msg.Attributes.Name) pushMetric(ch, c.getFieldDesc("flags"), "flags", msg.Flags, prometheus.GaugeValue, msg.Attributes.Name) pushMetric(ch, c.getFieldDesc("iface_id"), "iface_id", msg.Index, prometheus.GaugeValue, msg.Attributes.Name) pushMetric(ch, c.getFieldDesc("iface_link_mode"), "iface_link_mode", msg.Attributes.LinkMode, prometheus.GaugeValue, msg.Attributes.Name) @@ -117,6 +139,7 @@ func (c *netClassCollector) netClassRTNLUpdate(ch chan<- prometheus.Metric) erro } pushMetric(ch, c.getFieldDesc("mtu_bytes"), "mtu_bytes", msg.Attributes.MTU, prometheus.GaugeValue, msg.Attributes.Name) + pushMetric(ch, c.getFieldDesc("name_assign_type"), "name_assign_type", ifaceInfo.NameAssignType, prometheus.GaugeValue, msg.Attributes.Name) pushMetric(ch, c.getFieldDesc("net_dev_group"), "net_dev_group", msg.Attributes.NetDevGroup, prometheus.GaugeValue, msg.Attributes.Name) pushMetric(ch, c.getFieldDesc("transmit_queue_length"), "transmit_queue_length", msg.Attributes.TxQueueLen, prometheus.GaugeValue, msg.Attributes.Name) pushMetric(ch, c.getFieldDesc("protocol_type"), "protocol_type", msg.Type, prometheus.GaugeValue, msg.Attributes.Name) @@ -186,3 +209,62 @@ func (c *netClassCollector) getLinkModes() ([]*ethtool.LinkMode, error) { return lms, err } + +// getSysfsAttributes reads attributes that are absent from netlink but provided +// by sysfs. +func getSysfsAttributes(links []rtnetlink.LinkMessage) (sysfs.NetClass, error) { + netClass := sysfs.NetClass{} + for _, msg := range links { + var err error + interfaceClass := sysfs.NetClassIface{} + ifName := msg.Attributes.Name + + // These three attributes hold a device-specific lock when + // accessed, not the RTNL lock, so they are much less impactful + // than reading most of the other attributes from sysfs. + interfaceClass.AddrAssignType, err = getSysfsAttrAsInt64(ifName, "addr_assign_type") + if err != nil { + return nil, err + } + interfaceClass.DevID, err = getSysfsAttrAsInt64(ifName, "dev_id") + if err != nil { + return nil, err + } + interfaceClass.NameAssignType, err = getSysfsAttrAsInt64(ifName, "name_assign_type") + if err != nil { + return nil, err + } + + netClass[ifName] = interfaceClass + } + return netClass, nil +} + +func getSysfsAttrAsInt64(linkName, attr string) (*int64, error) { + name := filepath.Join("/sys", "class", "net", linkName, attr) + data, err := os.ReadFile(name) + if err != nil { + var perr *fs.PathError + // Ignore certain errors we know aren't fatal; same as + // prometheus's sysfs class does + if os.IsNotExist(err) || os.IsPermission(err) { + return nil, nil + } else if errors.As(err, &perr) { + realErr := perr.Unwrap() + if realErr.Error() == "operation not supported" || realErr.Error() == "invalid argument" { + return nil, nil + } + } + return nil, fmt.Errorf("failed to read file %q: %w", name, err) + } + + // base 0 automatically handles number type prefixes (hex, octal, etc) + const base = 0 + val, err := strconv.ParseInt(string(bytes.TrimSpace(data)), base, 64) + if err != nil { + // Ignore the error; can't do much with it and an error + // parsing a single attribute shouldn't stop parsing others + return nil, nil + } + return &val, nil +} From 5695a6ac34a28278884fad5e5bc1034981b0dd24 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 25 Apr 2023 07:46:35 -0500 Subject: [PATCH 2/2] force netlink collector --- collector/netclass_linux.go | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/collector/netclass_linux.go b/collector/netclass_linux.go index 8f2b0d22b4..523422ba42 100644 --- a/collector/netclass_linux.go +++ b/collector/netclass_linux.go @@ -64,10 +64,7 @@ func NewNetClassCollector(logger log.Logger) (Collector, error) { } func (c *netClassCollector) Update(ch chan<- prometheus.Metric) error { - if *netclassNetlink { - return c.netClassRTNLUpdate(ch) - } - return c.netClassSysfsUpdate(ch) + return c.netClassRTNLUpdate(ch) } func (c *netClassCollector) netClassSysfsUpdate(ch chan<- prometheus.Metric) error {