From e60f4f0e52b4321211c4aaa0976bb5cd42cb0e83 Mon Sep 17 00:00:00 2001 From: Siavash Safi Date: Wed, 8 Oct 2025 13:27:35 +0200 Subject: [PATCH] fix: improve inhibition performance This change adds a new index per inhibition rule which: 1. extracts the subset of source alert labelset which are in equals 2. calculates the fingerprint of the above 3. maps the calculated fingerprint to the source alert fingerprint 4. performs the same calculation for target alerts 5. uses the above index to find the equal source alerts quickly This significantly improves the inhibition performance, since there is no need to loop over all source alerts and the equal labels. The equals index items are garbage collected by callback from `scache`. Signed-off-by: Siavash Safi --- inhibit/index.go | 57 ++++++++++++++++++ inhibit/inhibit.go | 107 +++++++++++++++++++++++++++++----- inhibit/inhibit_bench_test.go | 2 +- inhibit/inhibit_test.go | 53 +++++++++++------ 4 files changed, 183 insertions(+), 36 deletions(-) create mode 100644 inhibit/index.go diff --git a/inhibit/index.go b/inhibit/index.go new file mode 100644 index 0000000000..e931595980 --- /dev/null +++ b/inhibit/index.go @@ -0,0 +1,57 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package inhibit + +import ( + "sync" + + "github.com/prometheus/common/model" +) + +// index contains map of fingerprints to fingerprints. +// The keys are fingerprints of the equal labels of source alerts. +// The values are fingerprints of the source alerts. +// For more info see comments on inhibitor and InhibitRule. +type index struct { + mtx sync.RWMutex + items map[model.Fingerprint]model.Fingerprint +} + +func newIndex() *index { + return &index{ + items: make(map[model.Fingerprint]model.Fingerprint), + } +} + +func (c *index) Get(key model.Fingerprint) (model.Fingerprint, bool) { + c.mtx.RLock() + defer c.mtx.RUnlock() + + fp, ok := c.items[key] + return fp, ok +} + +func (c *index) Set(key, value model.Fingerprint) { + c.mtx.Lock() + defer c.mtx.Unlock() + + c.items[key] = value +} + +func (c *index) Delete(key model.Fingerprint) { + c.mtx.Lock() + defer c.mtx.Unlock() + + delete(c.items, key) +} diff --git a/inhibit/inhibit.go b/inhibit/inhibit.go index 3650186e37..a34df8d0e5 100644 --- a/inhibit/inhibit.go +++ b/inhibit/inhibit.go @@ -1,4 +1,4 @@ -// Copyright 2015 Prometheus Team +// Copyright The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -74,7 +74,10 @@ func (ih *Inhibitor) run(ctx context.Context) { if r.SourceMatchers.Matches(a.Labels) { if err := r.scache.Set(a); err != nil { ih.logger.Error("error on set alert", "err", err) + continue } + + r.updateIndex(a) } } } @@ -162,6 +165,12 @@ type InhibitRule struct { // Cache of alerts matching source labels. scache *store.Alerts + + // Index of fingerprints of source alert equal labels to fingerprint of source alert. + // The index helps speed up source alert lookups from scache significantely in scenarios with 100s of source alerts cached. + // The index items might overwrite eachother if multiple source alerts have exact equal labels. + // Overwrites only happen if the new source alert has bigger EndsAt value. + sindex *index } // NewInhibitRule returns a new InhibitRule based on a configuration definition. @@ -217,11 +226,85 @@ func NewInhibitRule(cr config.InhibitRule) *InhibitRule { equal[model.LabelName(ln)] = struct{}{} } - return &InhibitRule{ + rule := &InhibitRule{ SourceMatchers: sourcem, TargetMatchers: targetm, Equal: equal, scache: store.NewAlerts(), + sindex: newIndex(), + } + rule.scache.SetGCCallback(rule.gcCallback) + + return rule +} + +// fingerprintEquals returns the fingerprint of the equal labels of the given label set. +func (r *InhibitRule) fingerprintEquals(lset model.LabelSet) model.Fingerprint { + equalSet := model.LabelSet{} + for n := range r.Equal { + equalSet[n] = lset[n] + } + return equalSet.Fingerprint() +} + +// updateIndex updates the source alert index if necessary. +func (r *InhibitRule) updateIndex(alert *types.Alert) { + fp := alert.Fingerprint() + // Calculate source labelset subset which is in equals. + eq := r.fingerprintEquals(alert.Labels) + + // Check if the equal labelset is already in the index. + indexed, ok := r.sindex.Get(eq) + if !ok { + // If not, add it. + r.sindex.Set(eq, fp) + return + } + // If the indexed fingerprint is the same as the new fingerprint, do nothing. + if indexed == fp { + return + } + + // New alert and existing index are not the same, compare them. + existing, err := r.scache.Get(indexed) + if err != nil { + // failed to get the existing alert, overwrite the index. + r.sindex.Set(eq, fp) + return + } + + // If the new alert resolves after the existing alert, replace the index. + if existing.ResolvedAt(alert.EndsAt) { + r.sindex.Set(eq, fp) + return + } + // If the existing alert resolves after the new alert, do nothing. +} + +// findEqualSourceAlert returns the source alert that matches the equal labels of the given label set. +func (r *InhibitRule) findEqualSourceAlert(lset model.LabelSet, now time.Time) (*types.Alert, bool) { + equalsFP := r.fingerprintEquals(lset) + sourceFP, ok := r.sindex.Get(equalsFP) + if ok { + alert, err := r.scache.Get(sourceFP) + if err != nil { + return nil, false + } + + if alert.ResolvedAt(now) { + return nil, false + } + + return alert, true + } + + return nil, false +} + +func (r *InhibitRule) gcCallback(alerts []types.Alert) { + for _, a := range alerts { + fp := r.fingerprintEquals(a.Labels) + r.sindex.Delete(fp) } } @@ -231,21 +314,13 @@ func NewInhibitRule(cr config.InhibitRule) *InhibitRule { // source and the target side of the rule are disregarded. func (r *InhibitRule) hasEqual(lset model.LabelSet, excludeTwoSidedMatch bool) (model.Fingerprint, bool) { now := time.Now() -Outer: - for _, a := range r.scache.List() { - // The cache might be stale and contain resolved alerts. - if a.ResolvedAt(now) { - continue + equal, found := r.findEqualSourceAlert(lset, now) + if found { + if excludeTwoSidedMatch && r.TargetMatchers.Matches(equal.Labels) { + return model.Fingerprint(0), false } - for n := range r.Equal { - if a.Labels[n] != lset[n] { - continue Outer - } - } - if excludeTwoSidedMatch && r.TargetMatchers.Matches(a.Labels) { - continue Outer - } - return a.Fingerprint(), true + return equal.Fingerprint(), found } + return model.Fingerprint(0), false } diff --git a/inhibit/inhibit_bench_test.go b/inhibit/inhibit_bench_test.go index 94023113a6..18998e0ecc 100644 --- a/inhibit/inhibit_bench_test.go +++ b/inhibit/inhibit_bench_test.go @@ -1,4 +1,4 @@ -// Copyright 2024 The Prometheus Authors +// Copyright The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at diff --git a/inhibit/inhibit_test.go b/inhibit/inhibit_test.go index 59ff52b195..3b640ba6fc 100644 --- a/inhibit/inhibit_test.go +++ b/inhibit/inhibit_test.go @@ -1,4 +1,4 @@ -// Copyright 2016 Prometheus Team +// Copyright The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -35,25 +35,26 @@ func TestInhibitRuleHasEqual(t *testing.T) { now := time.Now() cases := []struct { + name string initial map[model.Fingerprint]*types.Alert equal model.LabelNames input model.LabelSet result bool }{ { - // No source alerts at all. + name: "no source alerts", initial: map[model.Fingerprint]*types.Alert{}, input: model.LabelSet{"a": "b"}, result: false, }, { - // No equal labels, any source alerts satisfies the requirement. + name: "no equal labels, any source alerts satisfies the requirement", initial: map[model.Fingerprint]*types.Alert{1: {}}, input: model.LabelSet{"a": "b"}, result: true, }, { - // Matching but already resolved. + name: "matching but already resolved", initial: map[model.Fingerprint]*types.Alert{ 1: { Alert: model.Alert{ @@ -75,7 +76,7 @@ func TestInhibitRuleHasEqual(t *testing.T) { result: false, }, { - // Matching and unresolved. + name: "matching and unresolved", initial: map[model.Fingerprint]*types.Alert{ 1: { Alert: model.Alert{ @@ -97,7 +98,7 @@ func TestInhibitRuleHasEqual(t *testing.T) { result: true, }, { - // Equal label does not match. + name: "equal label does not match", initial: map[model.Fingerprint]*types.Alert{ 1: { Alert: model.Alert{ @@ -121,20 +122,24 @@ func TestInhibitRuleHasEqual(t *testing.T) { } for _, c := range cases { - r := &InhibitRule{ - Equal: map[model.LabelName]struct{}{}, - scache: store.NewAlerts(), - } - for _, ln := range c.equal { - r.Equal[ln] = struct{}{} - } - for _, v := range c.initial { - r.scache.Set(v) - } + t.Run(c.name, func(t *testing.T) { + r := &InhibitRule{ + Equal: map[model.LabelName]struct{}{}, + scache: store.NewAlerts(), + sindex: newIndex(), + } + for _, ln := range c.equal { + r.Equal[ln] = struct{}{} + } + for _, v := range c.initial { + r.scache.Set(v) + r.updateIndex(v) + } - if _, have := r.hasEqual(c.input, false); have != c.result { - t.Errorf("Unexpected result %t, expected %t", have, c.result) - } + if _, have := r.hasEqual(c.input, false); have != c.result { + t.Errorf("Unexpected result %t, expected %t", have, c.result) + } + }) } } @@ -174,8 +179,13 @@ func TestInhibitRuleMatches(t *testing.T) { ih.rules[0].scache = store.NewAlerts() ih.rules[0].scache.Set(sourceAlert1) + ih.rules[0].sindex = newIndex() + ih.rules[0].updateIndex(sourceAlert1) + ih.rules[1].scache = store.NewAlerts() ih.rules[1].scache.Set(sourceAlert2) + ih.rules[1].sindex = newIndex() + ih.rules[1].updateIndex(sourceAlert2) cases := []struct { target model.LabelSet @@ -270,8 +280,13 @@ func TestInhibitRuleMatchers(t *testing.T) { ih.rules[0].scache = store.NewAlerts() ih.rules[0].scache.Set(sourceAlert1) + ih.rules[0].sindex = newIndex() + ih.rules[0].updateIndex(sourceAlert1) + ih.rules[1].scache = store.NewAlerts() ih.rules[1].scache.Set(sourceAlert2) + ih.rules[1].sindex = newIndex() + ih.rules[1].updateIndex(sourceAlert2) cases := []struct { target model.LabelSet