Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 36 additions & 18 deletions inhibit/inhibit.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ var tracer = otel.Tracer("github.com/prometheus/alertmanager/inhibit")
type Inhibitor struct {
alerts provider.Alerts
rules []*InhibitRule
ruleIdx *ruleIndex
marker types.AlertMarker
logger *slog.Logger
propagator propagation.TextMapPropagator
Expand Down Expand Up @@ -74,6 +75,7 @@ func NewInhibitor(ap provider.Alerts, rs []amcommoncfg.InhibitRule, mk types.Ale
ruleNames[cr.Name] = struct{}{}
}
}
ih.ruleIdx = newRuleIndex(ih.rules)
return ih
}

Expand Down Expand Up @@ -189,33 +191,49 @@ func (ih *Inhibitor) Mutes(ctx context.Context, lset model.LabelSet) bool {
)
defer span.End()

now := time.Now()
for _, r := range ih.rules {
if !r.TargetMatchers.Matches(lset) {
// If target side of rule doesn't match, we don't need to look any further.
continue
}
span.AddEvent("alert matched rule target",
inhibitedByFP, inhibited := ih.checkInhibit(lset, time.Now(), span)
if inhibited {
ih.marker.SetInhibited(fp, inhibitedByFP.String())
span.AddEvent("alert inhibited",
trace.WithAttributes(
attribute.String("alerting.inhibit_rule.name", r.Name),
attribute.String("alerting.inhibit_rule.source.fingerprint", inhibitedByFP.String()),
),
)
// If we are here, the target side matches. If the source side matches, too, we
// need to exclude inhibiting alerts for which the same is true.
if inhibitedByFP, eq := r.hasEqual(lset, r.SourceMatchers.Matches(lset), now); eq {
ih.marker.SetInhibited(fp, inhibitedByFP.String())
span.AddEvent("alert inhibited",
return true
}

ih.marker.SetInhibited(fp)
span.AddEvent("alert not inhibited")
return false
}

// checkInhibit checks whether the given label set is inhibited by any rule.
// Returns the fingerprint of the inhibiting alert and true if inhibited.
// The span parameter is optional and used for per-rule tracing events.
func (ih *Inhibitor) checkInhibit(lset model.LabelSet, now time.Time, span trace.Span) (model.Fingerprint, bool) {
var inhibitedByFP model.Fingerprint

inhibited := ih.ruleIdx.forEachCandidate(lset, func(r *InhibitRule) bool {
if !r.TargetMatchers.Matches(lset) {
return false
}
if span != nil {
span.AddEvent("alert matched rule target",
trace.WithAttributes(
attribute.String("alerting.inhibit_rule.source.fingerprint", inhibitedByFP.String()),
attribute.String("alerting.inhibit_rule.name", r.Name),
),
)
}
// If we are here, the target side matches. If the source side matches, too, we
// need to exclude inhibiting alerts for which the same is true.
if foundFP, eq := r.hasEqual(lset, r.SourceMatchers.Matches(lset), now); eq {
inhibitedByFP = foundFP
return true
}
}
ih.marker.SetInhibited(fp)
span.AddEvent("alert not inhibited")
return false
})

return false
return inhibitedByFP, inhibited
}

// An InhibitRule specifies that a class of (source) alerts should inhibit
Expand Down
297 changes: 294 additions & 3 deletions inhibit/inhibit_bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -201,9 +201,7 @@ func benchmarkMutes(b *testing.B, opts benchmarkOptions) {
ih := NewInhibitor(s, rules, m, promslog.NewNopLogger())
defer ih.Stop()
go ih.Run()

// Wait some time for the inhibitor to seed its cache.
<-time.After(time.Second)
ih.WaitForLoading()

for b.Loop() {
require.NoError(b, opts.benchFunc(ih.Mutes))
Expand All @@ -228,3 +226,296 @@ func mustNewMatcher(b *testing.B, op labels.MatchType, name, value string) *labe
require.NoError(b, err)
return m
}

func BenchmarkMutesScaling(b *testing.B) {
b.Run("different_targets", func(b *testing.B) {
for _, numRules := range []int{10, 100, 1000} {
b.Run("rules="+strconv.Itoa(numRules), func(b *testing.B) {
benchmarkDifferentTargets(b, numRules)
})
}
})

b.Run("same_target", func(b *testing.B) {
for _, numRules := range []int{10, 100, 1000} {
b.Run("rules="+strconv.Itoa(numRules), func(b *testing.B) {
benchmarkSameTarget(b, numRules)
})
}
})

b.Run("no_match", func(b *testing.B) {
for _, numRules := range []int{10, 100, 1000} {
b.Run("rules="+strconv.Itoa(numRules), func(b *testing.B) {
benchmarkNoMatch(b, numRules)
})
}
})
}

func benchmarkDifferentTargets(b *testing.B, numRules int) {
r := prometheus.NewRegistry()
m := types.NewMarker(r)
s, err := mem.NewAlerts(context.TODO(), m, time.Minute, 0, nil, promslog.NewNopLogger(), r, nil)
require.NoError(b, err)
defer s.Close()

rules := make([]amcommoncfg.InhibitRule, numRules)
for i := range numRules {
rules[i] = amcommoncfg.InhibitRule{
SourceMatchers: amcommoncfg.Matchers{
mustNewMatcher(b, labels.MatchEqual, "alertname", "SourceAlert"),
mustNewMatcher(b, labels.MatchEqual, "cluster", strconv.Itoa(i)),
},
TargetMatchers: amcommoncfg.Matchers{
mustNewMatcher(b, labels.MatchEqual, "severity", "warning"),
mustNewMatcher(b, labels.MatchEqual, "cluster", strconv.Itoa(i)),
},
}
}

// Source alert for the LAST rule (worst case for linear scan)
lastCluster := strconv.Itoa(numRules - 1)
alert := types.Alert{
Alert: model.Alert{
Labels: model.LabelSet{
"alertname": "SourceAlert",
"cluster": model.LabelValue(lastCluster),
},
},
}
require.NoError(b, s.Put(context.Background(), &alert))

ih := NewInhibitor(s, rules, m, promslog.NewNopLogger())
defer ih.Stop()
go ih.Run()
ih.WaitForLoading()

targetLset := model.LabelSet{
"alertname": "TargetAlert",
"severity": "warning",
"cluster": model.LabelValue(lastCluster),
}
ctx := context.Background()

b.ResetTimer()
b.ReportAllocs()

for b.Loop() {
if !ih.Mutes(ctx, targetLset) {
b.Fatal("expected alert to be muted")
}
}
}

func benchmarkSameTarget(b *testing.B, numRules int) {
r := prometheus.NewRegistry()
m := types.NewMarker(r)
s, err := mem.NewAlerts(context.TODO(), m, time.Minute, 0, nil, promslog.NewNopLogger(), r, nil)
require.NoError(b, err)
defer s.Close()

rules := make([]amcommoncfg.InhibitRule, numRules)
for i := range numRules {
rules[i] = amcommoncfg.InhibitRule{
SourceMatchers: amcommoncfg.Matchers{
mustNewMatcher(b, labels.MatchEqual, "src", strconv.Itoa(i)),
},
TargetMatchers: amcommoncfg.Matchers{
mustNewMatcher(b, labels.MatchEqual, "dst", "0"),
},
}
}

// Source alert for the LAST rule only
alert := types.Alert{
Alert: model.Alert{
Labels: model.LabelSet{
"src": model.LabelValue(strconv.Itoa(numRules - 1)),
},
},
}
require.NoError(b, s.Put(context.Background(), &alert))

ih := NewInhibitor(s, rules, m, promslog.NewNopLogger())
defer ih.Stop()
go ih.Run()
ih.WaitForLoading()

targetLset := model.LabelSet{"dst": "0"}
ctx := context.Background()

b.ResetTimer()
b.ReportAllocs()

for b.Loop() {
if !ih.Mutes(ctx, targetLset) {
b.Fatal("expected alert to be muted")
}
}
}

func benchmarkNoMatch(b *testing.B, numRules int) {
r := prometheus.NewRegistry()
m := types.NewMarker(r)
s, err := mem.NewAlerts(context.TODO(), m, time.Minute, 0, nil, promslog.NewNopLogger(), r, nil)
require.NoError(b, err)
defer s.Close()

rules := make([]amcommoncfg.InhibitRule, numRules)
for i := range numRules {
rules[i] = amcommoncfg.InhibitRule{
SourceMatchers: amcommoncfg.Matchers{
mustNewMatcher(b, labels.MatchEqual, "alertname", "SourceAlert"),
},
TargetMatchers: amcommoncfg.Matchers{
mustNewMatcher(b, labels.MatchEqual, "cluster", strconv.Itoa(i)),
},
}
}

ih := NewInhibitor(s, rules, m, promslog.NewNopLogger())
defer ih.Stop()
go ih.Run()
ih.WaitForLoading()

// Alert with cluster that doesn't match any rule
targetLset := model.LabelSet{
"alertname": "TargetAlert",
"cluster": "nonexistent",
}
ctx := context.Background()

b.ResetTimer()
b.ReportAllocs()

for b.Loop() {
if ih.Mutes(ctx, targetLset) {
b.Fatal("expected alert to NOT be muted")
}
}
}

// BenchmarkMinRulesForIndexThreshold compares linear vs indexed lookup at various rule counts.
//
// Results (ns/op):
//
// rules | linear | indexed
// 1 | 17 | 17
// 2 | 29 | 85
// 5 | 68 | 84
// 10 | 135 | 94
//
// Crossover at ~7 rules. Default MinRulesForIndex=2 enables indexing early since
// high-overlap detection handles pathological cases.
func BenchmarkMinRulesForIndexThreshold(b *testing.B) {
for _, numRules := range []int{1, 2, 3, 5, 10} {
b.Run("rules="+strconv.Itoa(numRules), func(b *testing.B) {
benchmarkRuleIndexThreshold(b, numRules)
})
}
}

func benchmarkRuleIndexThreshold(b *testing.B, numRules int) {
rules := make([]*InhibitRule, numRules)
for i := range numRules {
rules[i] = &InhibitRule{
TargetMatchers: labels.Matchers{
mustNewMatcher(b, labels.MatchEqual, "cluster", strconv.Itoa(i)),
},
}
}

lset := model.LabelSet{"cluster": "0"}

b.Run("linear", func(b *testing.B) {
opts := ruleIndexOptions{minRulesForIndex: numRules + 1, maxMatcherOverlapRatio: 0.5}
idx := newRuleIndexWithOptions(rules, opts)

b.ResetTimer()
for b.Loop() {
idx.forEachCandidate(lset, func(r *InhibitRule) bool {
r.TargetMatchers.Matches(lset)
return false
})
}
})

b.Run("indexed", func(b *testing.B) {
opts := ruleIndexOptions{minRulesForIndex: 1, maxMatcherOverlapRatio: 0.5}
idx := newRuleIndexWithOptions(rules, opts)
Comment on lines +444 to +446
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Check the ruleIndex construction logic to understand how overlap is calculated
# and whether a single rule with a unique matcher would be indexed or fall back to linear.

# Find the newRuleIndexWithOptions function and overlap calculation logic
ast-grep --pattern $'func newRuleIndexWithOptions($_, $_) $_ {
  $$$
}'

# Also search for overlap ratio calculation
rg -n -A10 'maxMatcherOverlapRatio|overlapRatio' inhibit/rule_index.go

Repository: prometheus/alertmanager

Length of output: 5626


🏁 Script executed:

#!/bin/bash
# Find the benchmark setup to see how many rules are created
# Search for the context around lines 444-446
sed -n '430,460p' inhibit/inhibit_bench_test.go

# Also search for numRules variable and setup
rg -n 'numRules' inhibit/inhibit_bench_test.go

Repository: prometheus/alertmanager

Length of output: 2446


🏁 Script executed:

#!/bin/bash
# Find the forEachCandidate implementation
ast-grep --pattern $'func ($_) forEachCandidate($_, $_) {
  $$$
}'

# Also search for it directly
rg -n 'func.*forEachCandidate' inhibit/rule_index.go -A 30

Repository: prometheus/alertmanager

Length of output: 1090


The "indexed" benchmark doesn't construct an index for numRules=1.

When numRules=1 and maxMatcherOverlapRatio=0.5, the overlap calculation results in maxOverlap=0, which causes all matchers to be marked high-overlap and excluded from indexing. This leaves exactIndex empty, causing forEachCandidate() (line 172 of rule_index.go) to fall back to linear scan: if idx.useLinearScan || len(idx.exactIndex) == 0. The benchmark thus compares linear vs. linear rather than linear vs. indexed for the numRules=1 case.

For larger rule counts (2, 3, 5, 10), the index is properly constructed and tested. Consider increasing minRulesForIndex or adjusting maxMatcherOverlapRatio to ensure the indexed variant actually uses the index even for small rule sets.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@inhibit/inhibit_bench_test.go` around lines 444 - 446, The benchmark's
"indexed" case builds a rule index with ruleIndexOptions{minRulesForIndex: 1,
maxMatcherOverlapRatio: 0.5}, which yields maxOverlap=0 for numRules=1 so all
matchers are treated as high-overlap and exactIndex remains empty; change the
options in the b.Run("indexed", ...) setup (where newRuleIndexWithOptions is
called) to ensure an index is actually built for small rule counts—e.g., set
minRulesForIndex to 2 or increase maxMatcherOverlapRatio (or both) so exactIndex
is non-empty; this will make forEachCandidate (rule_index.go) exercise the
indexed path instead of falling back to linear scan.


b.ResetTimer()
for b.Loop() {
idx.forEachCandidate(lset, func(r *InhibitRule) bool {
r.TargetMatchers.Matches(lset)
return false
})
}
})
}

// BenchmarkMaxMatcherOverlapRatio compares performance at various overlap thresholds.
//
// Results (ns/op):
//
// ratio | time
// 0.10 | 183
// 0.20 | 185
// 0.30 | 182
// 0.40 | 185
// 0.50 | 186
// 0.60 | 552
// 0.70 | 533
// 0.80 | 546
// 0.90 | 524
// 1.00 | 571
//
// Clear cliff between 0.5 and 0.6 with 3x degradation. Default MaxMatcherOverlapRatio=0.5
// is optimal - highest value before performance degrades.
func BenchmarkMaxMatcherOverlapRatio(b *testing.B) {
for _, ratio := range []float64{0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0} {
b.Run("ratio="+strconv.FormatFloat(ratio, 'f', 2, 64), func(b *testing.B) {
benchmarkOverlapRatio(b, ratio)
})
}
}

func benchmarkOverlapRatio(b *testing.B, ratio float64) {
numRules := 100
highOverlapCount := int(float64(numRules) * 0.6)

rules := make([]*InhibitRule, numRules)
for i := range highOverlapCount {
rules[i] = &InhibitRule{
TargetMatchers: labels.Matchers{
mustNewMatcher(b, labels.MatchEqual, "severity", "warning"),
},
}
}
for i := highOverlapCount; i < numRules; i++ {
rules[i] = &InhibitRule{
TargetMatchers: labels.Matchers{
mustNewMatcher(b, labels.MatchEqual, "cluster", strconv.Itoa(i)),
},
}
}

opts := ruleIndexOptions{minRulesForIndex: 2, maxMatcherOverlapRatio: ratio}
idx := newRuleIndexWithOptions(rules, opts)

lset := model.LabelSet{"severity": "warning", "cluster": model.LabelValue(strconv.Itoa(highOverlapCount))}

b.ResetTimer()
b.ReportAllocs()

var visited int
for b.Loop() {
visited = 0
idx.forEachCandidate(lset, func(r *InhibitRule) bool {
visited++
return false
})
}
_ = visited
}
Loading
Loading