From 13b64f428ec1d633103ba681adc34ce6b2f31462 Mon Sep 17 00:00:00 2001 From: STRRL Date: Mon, 3 May 2021 16:09:56 +0800 Subject: [PATCH 1/6] feat: new collector about thermal conditions on macos Signed-off-by: STRRL --- collector/therm_darwin.go | 182 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 182 insertions(+) create mode 100644 collector/therm_darwin.go diff --git a/collector/therm_darwin.go b/collector/therm_darwin.go new file mode 100644 index 0000000000..9bca5c91c9 --- /dev/null +++ b/collector/therm_darwin.go @@ -0,0 +1,182 @@ +// Copyright 2021 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build !notherm + +package collector + +/* +#cgo LDFLAGS: -framework IOKit -framework CoreFoundation +#include +#include +#include +#include +#include + +struct ref_with_ret { + CFDictionaryRef ref; + IOReturn ret; +}; + +struct ref_with_ret FetchThermal(); + +struct ref_with_ret FetchThermal() { + CFDictionaryRef ref; + IOReturn ret; + ret = IOPMCopyCPUPowerStatus(&ref); + struct ref_with_ret result = { + ref, + ret, + }; + return result; +} +*/ +import "C" + +import ( + "errors" + "fmt" + "github.com/go-kit/kit/log" + "github.com/prometheus/client_golang/prometheus" + "unsafe" +) + +type thermCollector struct { + cpuSchedulerLimit typedDesc + cpuAvailableCPU typedDesc + cpuSpeedLimit typedDesc + logger log.Logger +} + +const therm = "therm" + +func init() { + registerCollector("therm", defaultEnabled, NewThermCollector) +} + +func NewThermCollector(logger log.Logger) (Collector, error) { + return &thermCollector{ + cpuSchedulerLimit: typedDesc{ + desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, therm, "cpu_scheduler_limit"), + "Represents the percentage (0-100) of CPU time available. 100% at normal operation. The OS may limit this time for a percentage less than 100%.", + nil, + nil), + valueType: prometheus.GaugeValue, + }, + cpuAvailableCPU: typedDesc{ + desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, therm, "cpu_available_cpu"), + "Reflects how many, if any, CPUs have been taken offline. Represented as an integer number of CPUs (0 - Max CPUs).", + nil, + nil, + ), + valueType: prometheus.GaugeValue, + }, + cpuSpeedLimit: typedDesc{ + desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, therm, "cpu_speed_limit"), + "Defines the speed & voltage limits placed on the CPU. Represented as a percentage (0-100) of maximum CPU speed.", + nil, + nil, + ), + valueType: prometheus.GaugeValue, + }, + logger: logger, + }, nil +} + +func (c *thermCollector) Update(ch chan<- prometheus.Metric) error { + cpuPowerStatus, err := fetchCPUPowerStatus() + if err != nil { + return err + } + if value, ok := cpuPowerStatus[(string(C.kIOPMCPUPowerLimitSchedulerTimeKey))]; ok { + ch <- c.cpuSchedulerLimit.mustNewConstMetric(float64(value)) + } + if value, ok := cpuPowerStatus[(string(C.kIOPMCPUPowerLimitProcessorCountKey))]; ok { + ch <- c.cpuAvailableCPU.mustNewConstMetric(float64(value)) + } + if value, ok := cpuPowerStatus[(string(C.kIOPMCPUPowerLimitProcessorSpeedKey))]; ok { + ch <- c.cpuSpeedLimit.mustNewConstMetric(float64(value)) + } + return nil +} + +func fetchCPUPowerStatus() (map[string]int, error) { + cfDictRef, _ := C.FetchThermal() + defer func() { + C.CFRelease(C.CFTypeRef(cfDictRef.ref)) + }() + + if C.kIOReturnNotFound == cfDictRef.ret { + return nil, errors.New("no CPU power status has been recorded") + } + + if C.kIOReturnSuccess != cfDictRef.ret { + return nil, fmt.Errorf("no CPU power status with error code 0x%08x", int(cfDictRef.ret)) + } + + // mapping CFDictionary to map + cfDict := CFDict(cfDictRef.ref) + return mappingCFDictToMap(cfDict), nil +} + +type CFDict uintptr + +func mappingCFDictToMap(dict CFDict) map[string]int { + if C.CFNullRef(dict) == C.kCFNull { + return nil + } + cfDict := C.CFDictionaryRef(dict) + + var result map[string]int + count := C.CFDictionaryGetCount(cfDict) + if count > 0 { + keys := make([]C.CFTypeRef, count) + values := make([]C.CFTypeRef, count) + C.CFDictionaryGetKeysAndValues(cfDict, (*unsafe.Pointer)(unsafe.Pointer(&keys[0])), (*unsafe.Pointer)(unsafe.Pointer(&values[0]))) + result = make(map[string]int, count) + for i := C.CFIndex(0); i < count; i++ { + result[mappingCFStringToString(C.CFStringRef(keys[i]))] = mappingCFNumberLongToInt(C.CFNumberRef(values[i])) + } + } + return result +} + +// CFStringToString converts a CFStringRef to a string. +func mappingCFStringToString(s C.CFStringRef) string { + p := C.CFStringGetCStringPtr(s, C.kCFStringEncodingUTF8) + if p != nil { + return C.GoString(p) + } + length := C.CFStringGetLength(s) + if length == 0 { + return "" + } + maxBufLen := C.CFStringGetMaximumSizeForEncoding(length, C.kCFStringEncodingUTF8) + if maxBufLen == 0 { + return "" + } + buf := make([]byte, maxBufLen) + var usedBufLen C.CFIndex + _ = C.CFStringGetBytes(s, C.CFRange{0, length}, C.kCFStringEncodingUTF8, C.UInt8(0), C.false, (*C.UInt8)(&buf[0]), maxBufLen, &usedBufLen) + return string(buf[:usedBufLen]) +} + +func mappingCFNumberLongToInt(n C.CFNumberRef) int { + typ := C.CFNumberGetType(n) + var long C.long + C.CFNumberGetValue(n, typ, unsafe.Pointer(&long)) + return int(long) +} From d9044b8e7f6d9b8b51c6adac18d3c9f99cf6b09b Mon Sep 17 00:00:00 2001 From: STRRL Date: Mon, 3 May 2021 16:18:14 +0800 Subject: [PATCH 2/6] chore: comments for NewThermCollector Signed-off-by: STRRL --- collector/therm_darwin.go | 1 + 1 file changed, 1 insertion(+) diff --git a/collector/therm_darwin.go b/collector/therm_darwin.go index 9bca5c91c9..be38a82c72 100644 --- a/collector/therm_darwin.go +++ b/collector/therm_darwin.go @@ -64,6 +64,7 @@ func init() { registerCollector("therm", defaultEnabled, NewThermCollector) } +// NewThermCollector returns a new Collector exposing current CPU power levels. func NewThermCollector(logger log.Logger) (Collector, error) { return &thermCollector{ cpuSchedulerLimit: typedDesc{ From da9f499d777ab51517756d6af6914425b14da8f3 Mon Sep 17 00:00:00 2001 From: STRRL Date: Mon, 3 May 2021 16:51:21 +0800 Subject: [PATCH 3/6] chore: rename therm to thermal Signed-off-by: STRRL --- collector/{therm_darwin.go => thermal_darwin.go} | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) rename collector/{therm_darwin.go => thermal_darwin.go} (94%) diff --git a/collector/therm_darwin.go b/collector/thermal_darwin.go similarity index 94% rename from collector/therm_darwin.go rename to collector/thermal_darwin.go index be38a82c72..af23c6278e 100644 --- a/collector/therm_darwin.go +++ b/collector/thermal_darwin.go @@ -58,10 +58,10 @@ type thermCollector struct { logger log.Logger } -const therm = "therm" +const thermal = "thermal" func init() { - registerCollector("therm", defaultEnabled, NewThermCollector) + registerCollector(thermal, defaultEnabled, NewThermCollector) } // NewThermCollector returns a new Collector exposing current CPU power levels. @@ -69,7 +69,7 @@ func NewThermCollector(logger log.Logger) (Collector, error) { return &thermCollector{ cpuSchedulerLimit: typedDesc{ desc: prometheus.NewDesc( - prometheus.BuildFQName(namespace, therm, "cpu_scheduler_limit"), + prometheus.BuildFQName(namespace, thermal, "cpu_scheduler_limit"), "Represents the percentage (0-100) of CPU time available. 100% at normal operation. The OS may limit this time for a percentage less than 100%.", nil, nil), @@ -77,7 +77,7 @@ func NewThermCollector(logger log.Logger) (Collector, error) { }, cpuAvailableCPU: typedDesc{ desc: prometheus.NewDesc( - prometheus.BuildFQName(namespace, therm, "cpu_available_cpu"), + prometheus.BuildFQName(namespace, thermal, "cpu_available_cpu"), "Reflects how many, if any, CPUs have been taken offline. Represented as an integer number of CPUs (0 - Max CPUs).", nil, nil, @@ -86,7 +86,7 @@ func NewThermCollector(logger log.Logger) (Collector, error) { }, cpuSpeedLimit: typedDesc{ desc: prometheus.NewDesc( - prometheus.BuildFQName(namespace, therm, "cpu_speed_limit"), + prometheus.BuildFQName(namespace, thermal, "cpu_speed_limit"), "Defines the speed & voltage limits placed on the CPU. Represented as a percentage (0-100) of maximum CPU speed.", nil, nil, From 4c9a54bd29446bebccab5c200b92fdda39ce90c6 Mon Sep 17 00:00:00 2001 From: STRRL Date: Mon, 3 May 2021 16:52:24 +0800 Subject: [PATCH 4/6] feat: use 0-1 ratio intead of 0-100 percentage Signed-off-by: STRRL --- collector/thermal_darwin.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/collector/thermal_darwin.go b/collector/thermal_darwin.go index af23c6278e..b35bc2e091 100644 --- a/collector/thermal_darwin.go +++ b/collector/thermal_darwin.go @@ -69,7 +69,7 @@ func NewThermCollector(logger log.Logger) (Collector, error) { return &thermCollector{ cpuSchedulerLimit: typedDesc{ desc: prometheus.NewDesc( - prometheus.BuildFQName(namespace, thermal, "cpu_scheduler_limit"), + prometheus.BuildFQName(namespace, thermal, "cpu_scheduler_limit_ratio"), "Represents the percentage (0-100) of CPU time available. 100% at normal operation. The OS may limit this time for a percentage less than 100%.", nil, nil), @@ -86,7 +86,7 @@ func NewThermCollector(logger log.Logger) (Collector, error) { }, cpuSpeedLimit: typedDesc{ desc: prometheus.NewDesc( - prometheus.BuildFQName(namespace, thermal, "cpu_speed_limit"), + prometheus.BuildFQName(namespace, thermal, "cpu_speed_limit_ratio"), "Defines the speed & voltage limits placed on the CPU. Represented as a percentage (0-100) of maximum CPU speed.", nil, nil, @@ -103,13 +103,13 @@ func (c *thermCollector) Update(ch chan<- prometheus.Metric) error { return err } if value, ok := cpuPowerStatus[(string(C.kIOPMCPUPowerLimitSchedulerTimeKey))]; ok { - ch <- c.cpuSchedulerLimit.mustNewConstMetric(float64(value)) + ch <- c.cpuSchedulerLimit.mustNewConstMetric(float64(value / 100.0)) } if value, ok := cpuPowerStatus[(string(C.kIOPMCPUPowerLimitProcessorCountKey))]; ok { ch <- c.cpuAvailableCPU.mustNewConstMetric(float64(value)) } if value, ok := cpuPowerStatus[(string(C.kIOPMCPUPowerLimitProcessorSpeedKey))]; ok { - ch <- c.cpuSpeedLimit.mustNewConstMetric(float64(value)) + ch <- c.cpuSpeedLimit.mustNewConstMetric(float64(value / 100.0)) } return nil } From 83a6e3ca9cf05dcc44f48cbe56bcbf5d68567dbc Mon Sep 17 00:00:00 2001 From: STRRL Date: Mon, 3 May 2021 18:20:06 +0800 Subject: [PATCH 5/6] chore: address the comments Signed-off-by: STRRL --- collector/thermal_darwin.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/collector/thermal_darwin.go b/collector/thermal_darwin.go index b35bc2e091..da6048e9d1 100644 --- a/collector/thermal_darwin.go +++ b/collector/thermal_darwin.go @@ -103,13 +103,13 @@ func (c *thermCollector) Update(ch chan<- prometheus.Metric) error { return err } if value, ok := cpuPowerStatus[(string(C.kIOPMCPUPowerLimitSchedulerTimeKey))]; ok { - ch <- c.cpuSchedulerLimit.mustNewConstMetric(float64(value / 100.0)) + ch <- c.cpuSchedulerLimit.mustNewConstMetric(float64(value) / 100.0) } if value, ok := cpuPowerStatus[(string(C.kIOPMCPUPowerLimitProcessorCountKey))]; ok { ch <- c.cpuAvailableCPU.mustNewConstMetric(float64(value)) } if value, ok := cpuPowerStatus[(string(C.kIOPMCPUPowerLimitProcessorSpeedKey))]; ok { - ch <- c.cpuSpeedLimit.mustNewConstMetric(float64(value / 100.0)) + ch <- c.cpuSpeedLimit.mustNewConstMetric(float64(value) / 100.0) } return nil } From 81582e1790dff3f31d0f4e171e572eb6b2fee294 Mon Sep 17 00:00:00 2001 From: STRRL Date: Tue, 20 Jul 2021 11:11:57 +0800 Subject: [PATCH 6/6] chore: appending description about thermal in README Signed-off-by: STRRL --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index a1539bc4c5..fabd9b1b32 100644 --- a/README.md +++ b/README.md @@ -121,6 +121,7 @@ sockstat | Exposes various statistics from `/proc/net/sockstat`. | Linux softnet | Exposes statistics from `/proc/net/softnet_stat`. | Linux stat | Exposes various statistics from `/proc/stat`. This includes boot time, forks and interrupts. | Linux textfile | Exposes statistics read from local disk. The `--collector.textfile.directory` flag must be set. | _any_ +thermal | Exposes thermal statistics like `pmset -g therm`. | Darwin thermal\_zone | Exposes thermal zone & cooling device statistics from `/sys/class/thermal`. | Linux time | Exposes the current system time. | _any_ timex | Exposes selected adjtimex(2) system call stats. | Linux