From a574afdf5f19efe8498a66a87feb4623cf843925 Mon Sep 17 00:00:00 2001 From: "Harper, Jason M" Date: Mon, 13 Oct 2025 15:19:43 -0700 Subject: [PATCH 1/5] kernel utilization metrics on EC2 AL2023 w/ 6.1 kernel Signed-off-by: Harper, Jason M --- cmd/metrics/loader_perfmon_group_core.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/cmd/metrics/loader_perfmon_group_core.go b/cmd/metrics/loader_perfmon_group_core.go index 24ac150a..33aafa27 100644 --- a/cmd/metrics/loader_perfmon_group_core.go +++ b/cmd/metrics/loader_perfmon_group_core.go @@ -241,6 +241,17 @@ func (group *CoreGroup) AddEvent(event CoreEvent, reorder bool, metadata Metadat validCounters += fmt.Sprintf("%d,", i) } } + // When the fixed ref cycles counter is not supported, we cannot put CPU_CLK_UNHALTED.REF_TSC (ref-cycles) and + // CPU_CLK_UNHALTED.REF_TSC_P:SUP (ref-cycles:k) in the same group. + // Note: this was discovered through testing on AWS m7i.8xlarge instances with Amazon Linux 2023 w/ kernel + // 6.1. The same platform but with kernel 6.12 supports the fixed ref cycles counter and doesn't have this limitation. + if !metadata.SupportsFixedRefCycles && strings.HasPrefix(event.EventName, "CPU_CLK_UNHALTED.REF_TSC") { + for _, existingEvent := range group.GeneralPurposeCounters { + if strings.HasPrefix(existingEvent.EventName, "CPU_CLK_UNHALTED.REF_TSC") { + return fmt.Errorf("cannot add %s to group as it contains %s and fixed reference cycles are not supported", event.EventName, existingEvent.EventName) + } + } + } // otherwise, it is a general purpose event, check if we can place it in one of the general purpose counters for i := range group.GeneralPurposeCounters { if counter := group.GeneralPurposeCounters[i]; counter.IsEmpty() { From e0427dd995a8a27953b28f21ccbab2a1b86a88b5 Mon Sep 17 00:00:00 2001 From: "Harper, Jason M" Date: Mon, 13 Oct 2025 21:01:49 -0700 Subject: [PATCH 2/5] fix ICX metric definition Signed-off-by: Harper, Jason M --- cmd/metrics/resources/perfmon/icx/icx_perfspect_metrics.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/metrics/resources/perfmon/icx/icx_perfspect_metrics.json b/cmd/metrics/resources/perfmon/icx/icx_perfspect_metrics.json index 127d7169..6ed93434 100644 --- a/cmd/metrics/resources/perfmon/icx/icx_perfspect_metrics.json +++ b/cmd/metrics/resources/perfmon/icx/icx_perfspect_metrics.json @@ -9,7 +9,7 @@ "BriefDescription": "CPU utilization percentage in kernel mode", "Events": [ { - "Name": "CPU_CLK_UNHALTED.REF_TSC_P:SUP", + "Name": "CPU_CLK_UNHALTED.REF_TSC:SUP", "Alias": "a" }, { From 53076dfc7efa06920ff3a60d5e72a44fcf033cb3 Mon Sep 17 00:00:00 2001 From: "Harper, Jason M" Date: Mon, 13 Oct 2025 21:02:08 -0700 Subject: [PATCH 3/5] icx metric event translation to perf Signed-off-by: Harper, Jason M --- cmd/metrics/loader_perfmon_event_core.go | 1 + 1 file changed, 1 insertion(+) diff --git a/cmd/metrics/loader_perfmon_event_core.go b/cmd/metrics/loader_perfmon_event_core.go index 32e21a89..6d73ddf5 100644 --- a/cmd/metrics/loader_perfmon_event_core.go +++ b/cmd/metrics/loader_perfmon_event_core.go @@ -145,6 +145,7 @@ var fixedCounterEventNameTranslation = map[string]string{ "CPU_CLK_UNHALTED.THREAD_P:SUP": "cpu-cycles:k", "CPU_CLK_UNHALTED.CORE_P:SUP": "cpu-cycles:k", // srf - thread and core are the same "CPU_CLK_UNHALTED.REF_TSC": "ref-cycles", + "CPU_CLK_UNHALTED.REF_TSC:SUP": "ref-cycles:k", "CPU_CLK_UNHALTED.REF_TSC_P:SUP": "ref-cycles:k", "TOPDOWN.SLOTS:perf_metrics": "topdown.slots", "PERF_METRICS.BAD_SPECULATION": "topdown-bad-spec", From 7166d2d7caa4b24df1762bf4f69c3733c422c3b6 Mon Sep 17 00:00:00 2001 From: "Harper, Jason M" Date: Mon, 13 Oct 2025 21:03:10 -0700 Subject: [PATCH 4/5] make deterministic expression parsing to replace fixed counter event names with corresponding perf event names Signed-off-by: Harper, Jason M --- cmd/metrics/loader_perfmon.go | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/cmd/metrics/loader_perfmon.go b/cmd/metrics/loader_perfmon.go index f997e18d..cbf45317 100644 --- a/cmd/metrics/loader_perfmon.go +++ b/cmd/metrics/loader_perfmon.go @@ -330,9 +330,17 @@ func getExpression(perfmonMetric PerfmonMetric) (string, error) { expression = strings.ReplaceAll(expression, commonEvent, alias) } // replace fixed counter perfmon event names with their corresponding perf event names - for perfmonEventName, perfEventName := range fixedCounterEventNameTranslation { - // Replace event name as whole words only (not substrings) - expression = util.ReplaceWholeWord(expression, perfmonEventName, perfEventName) + // example: "100 * ([ref-cycles:k] / [TSC])" + // parse out the list of events/variables from the expression + expressionVars := regexp.MustCompile(`\[[^\]]+\]`) + // for each event/variable, check if it is in the fixedCounterEventNameTranslation map + for _, match := range expressionVars.FindAllString(expression, -1) { + for perfmonEventName, perfEventName := range fixedCounterEventNameTranslation { + if match == "["+perfmonEventName+"]" { + expression = util.ReplaceWholeWord(expression, perfmonEventName, perfEventName) + break + } + } } return expression, nil } From 48121ee525460a3a36c4406789bce8b5b45c42da Mon Sep 17 00:00:00 2001 From: "Harper, Jason M" Date: Tue, 14 Oct 2025 06:09:41 -0700 Subject: [PATCH 5/5] cleanup fix counter event name replacement Signed-off-by: Harper, Jason M --- cmd/metrics/loader_perfmon.go | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/cmd/metrics/loader_perfmon.go b/cmd/metrics/loader_perfmon.go index cbf45317..6f62fcba 100644 --- a/cmd/metrics/loader_perfmon.go +++ b/cmd/metrics/loader_perfmon.go @@ -309,6 +309,9 @@ func customizeOCREventNames(metrics []MetricDefinition) []MetricDefinition { // example formula: "( 1000000000 * (a / b) / (c / (d * socket_count) ) ) * DURATIONTIMEINSECONDS" // desired output: "( 1000000000 * ([event1] / [event2]) / ([constant1] / ([constant2] * socket_count) ) ) * 1" func getExpression(perfmonMetric PerfmonMetric) (string, error) { + if perfmonMetric.Formula == "" { + return "", fmt.Errorf("metric '%s' has no formula defined", perfmonMetric.MetricName) + } expression := perfmonMetric.Formula replacers := make(map[string]string) for _, event := range perfmonMetric.Events { @@ -329,17 +332,17 @@ func getExpression(perfmonMetric PerfmonMetric) (string, error) { for commonEvent, alias := range commonEventReplacements { expression = strings.ReplaceAll(expression, commonEvent, alias) } - // replace fixed counter perfmon event names with their corresponding perf event names - // example: "100 * ([ref-cycles:k] / [TSC])" - // parse out the list of events/variables from the expression - expressionVars := regexp.MustCompile(`\[[^\]]+\]`) - // for each event/variable, check if it is in the fixedCounterEventNameTranslation map - for _, match := range expressionVars.FindAllString(expression, -1) { - for perfmonEventName, perfEventName := range fixedCounterEventNameTranslation { - if match == "["+perfmonEventName+"]" { - expression = util.ReplaceWholeWord(expression, perfmonEventName, perfEventName) - break - } + // replace fixed counter perfmon event names with their corresponding perf + // event names found in the fixedCounterEventNameTranslation map + // example: "100 * ([CPU_CLK_UNHALTED.REF_TSC:k] / [TSC])" + // becomes "100 * ([ref-cycles:k] / [TSC])" + expressionVarPattern := regexp.MustCompile(`\[[^\]]+\]`) + for _, match := range expressionVarPattern.FindAllString(expression, -1) { + // strip the brackets + match = strings.Trim(match, "[]") + // check if the match is in the translation map + if perfEventName, ok := fixedCounterEventNameTranslation[match]; ok { + expression = strings.ReplaceAll(expression, match, perfEventName) } } return expression, nil