Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,12 @@ route:
# The labels by which incoming alerts are grouped together. For example,
# multiple alerts coming in for cluster=A and alertname=LatencyHigh would
# be batched into a single group.
#
# To aggregate by all possible labels use '...' as the sole label name.
# This effectively disables aggregation entirely, passing through all
# alerts as-is. This is unlikely to be what you want, unless you have
# a very low alert volume or your upstream notification system performs
# its own grouping. Example: group_by: [...]
group_by: ['alertname', 'cluster']

# When a new group of alerts is created by an incoming alert, wait at
Expand Down
22 changes: 20 additions & 2 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -494,8 +494,11 @@ func (c *GlobalConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {

// A Route is a node that contains definitions of how to handle alerts.
type Route struct {
Receiver string `yaml:"receiver,omitempty" json:"receiver,omitempty"`
GroupBy []model.LabelName `yaml:"group_by,omitempty" json:"group_by,omitempty"`
Receiver string `yaml:"receiver,omitempty" json:"receiver,omitempty"`

GroupByStr []string `yaml:"group_by,omitempty" json:"group_by,omitempty"`
GroupBy []model.LabelName
GroupByAll bool

Match map[string]string `yaml:"match,omitempty" json:"match,omitempty"`
MatchRE map[string]Regexp `yaml:"match_re,omitempty" json:"match_re,omitempty"`
Expand Down Expand Up @@ -525,6 +528,21 @@ func (r *Route) UnmarshalYAML(unmarshal func(interface{}) error) error {
return fmt.Errorf("invalid label name %q", k)
}
}
for _, l := range r.GroupByStr {
if l == "..." {
r.GroupByAll = true
} else {
labelName := model.LabelName(l)
if !labelName.IsValid() {
return fmt.Errorf("invalid label name %q in group_by list", l)
}
r.GroupBy = append(r.GroupBy, labelName)
}
}

if len(r.GroupBy) > 0 && r.GroupByAll {
return fmt.Errorf("cannot have wildcard group_by (`...`) and other other labels at the same time")
}

groupBy := map[model.LabelName]struct{}{}

Expand Down
58 changes: 58 additions & 0 deletions config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,47 @@ receivers:

}

func TestWildcardGroupByWithOtherGroupByLabels(t *testing.T) {
in := `
route:
group_by: ['alertname', 'cluster', '...']
receiver: team-X-mails
receivers:
- name: 'team-X-mails'
`
_, err := Load(in)

expected := "cannot have wildcard group_by (`...`) and other other labels at the same time"

if err == nil {
t.Fatalf("no error returned, expected:\n%q", expected)
}
if err.Error() != expected {
t.Errorf("\nexpected:\n%q\ngot:\n%q", expected, err.Error())
}
}

func TestGroupByInvalidLabel(t *testing.T) {
in := `
route:
group_by: ['-invalid-']
receiver: team-X-mails
receivers:
- name: 'team-X-mails'
`
_, err := Load(in)

expected := "invalid label name \"-invalid-\" in group_by list"

if err == nil {
t.Fatalf("no error returned, expected:\n%q", expected)
}
if err.Error() != expected {
t.Errorf("\nexpected:\n%q\ngot:\n%q", expected, err.Error())
}

}

func TestRootRouteExists(t *testing.T) {
in := `
receivers:
Expand Down Expand Up @@ -448,6 +489,12 @@ func TestEmptyFieldsAndRegex(t *testing.T) {
"cluster",
"service",
},
GroupByStr: []string{
"alertname",
"cluster",
"service",
},
GroupByAll: false,
Routes: []*Route{
{
Receiver: "team-X-mails",
Expand Down Expand Up @@ -506,6 +553,17 @@ func TestSMTPHello(t *testing.T) {
}
}

func TestGroupByAll(t *testing.T) {
c, _, err := LoadFile("testdata/conf.group-by-all.yml")
if err != nil {
t.Errorf("Error parsing %s: %s", "testdata/conf.group-by-all.yml", err)
}

if !c.Route.GroupByAll {
t.Errorf("Invalid group by all param: expected to by true")
}
}

func TestVictorOpsDefaultAPIKey(t *testing.T) {
conf, _, err := LoadFile("testdata/conf.victorops-default-apikey.yml")
if err != nil {
Expand Down
10 changes: 10 additions & 0 deletions config/testdata/conf.group-by-all.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
route:
group_by: [...]
group_wait: 30s
group_interval: 5m
repeat_interval: 3h
receiver: team-X

receivers:
- name: 'team-X'

19 changes: 12 additions & 7 deletions dispatch/dispatch.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,13 +152,7 @@ type notifyFunc func(context.Context, ...*types.Alert) bool
// processAlert determines in which aggregation group the alert falls
// and inserts it.
func (d *Dispatcher) processAlert(alert *types.Alert, route *Route) {
groupLabels := model.LabelSet{}

for ln, lv := range alert.Labels {
if _, ok := route.RouteOpts.GroupBy[ln]; ok {
groupLabels[ln] = lv
}
}
groupLabels := getGroupLabels(alert, route)

fp := groupLabels.Fingerprint()

Expand Down Expand Up @@ -189,6 +183,17 @@ func (d *Dispatcher) processAlert(alert *types.Alert, route *Route) {
ag.insert(alert)
}

func getGroupLabels(alert *types.Alert, route *Route) model.LabelSet {
groupLabels := model.LabelSet{}
for ln, lv := range alert.Labels {
if _, ok := route.RouteOpts.GroupBy[ln]; ok || route.RouteOpts.GroupByAll {
groupLabels[ln] = lv
}
}

return groupLabels
}

// aggrGroup aggregates alert fingerprints into groups to which a
// common set of routing options applies.
// It emits notifications in the specified intervals.
Expand Down
64 changes: 64 additions & 0 deletions dispatch/dispatch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -240,3 +240,67 @@ func TestAggrGroup(t *testing.T) {

ag.stop()
}

func TestGroupLabels(t *testing.T) {
var a = &types.Alert{
Alert: model.Alert{
Labels: model.LabelSet{
"a": "v1",
"b": "v2",
"c": "v3",
},
},
}

route := &Route{
RouteOpts: RouteOpts{
GroupBy: map[model.LabelName]struct{}{
"a": struct{}{},
"b": struct{}{},
},
GroupByAll: false,
},
}

expLs := model.LabelSet{
"a": "v1",
"b": "v2",
}

ls := getGroupLabels(a, route)

if !reflect.DeepEqual(ls, expLs) {
t.Fatalf("expected labels are %v, but got %v", expLs, ls)
}
}

func TestGroupByAllLabels(t *testing.T) {
var a = &types.Alert{
Alert: model.Alert{
Labels: model.LabelSet{
"a": "v1",
"b": "v2",
"c": "v3",
},
},
}

route := &Route{
RouteOpts: RouteOpts{
GroupBy: map[model.LabelName]struct{}{},
GroupByAll: true,
},
}

expLs := model.LabelSet{
"a": "v1",
"b": "v2",
"c": "v3",
}

ls := getGroupLabels(a, route)

if !reflect.DeepEqual(ls, expLs) {
t.Fatalf("expected labels are %v, but got %v", expLs, ls)
}
}
12 changes: 11 additions & 1 deletion dispatch/route.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ var DefaultRouteOpts = RouteOpts{
GroupInterval: 5 * time.Minute,
RepeatInterval: 4 * time.Hour,
GroupBy: map[model.LabelName]struct{}{},
GroupByAll: false,
}

// A Route is a node that contains definitions of how to handle alerts.
Expand Down Expand Up @@ -69,6 +70,9 @@ func NewRoute(cr *config.Route, parent *Route) *Route {
opts.GroupBy[ln] = struct{}{}
}
}

opts.GroupByAll = cr.GroupByAll

if cr.GroupWait != nil {
opts.GroupWait = time.Duration(*cr.GroupWait)
}
Expand Down Expand Up @@ -158,6 +162,9 @@ type RouteOpts struct {
// What labels to group alerts by for notifications.
GroupBy map[model.LabelName]struct{}

// Use all alert labels to group.
GroupByAll bool

// How long to wait to group matching alerts before sending
// a notification.
GroupWait time.Duration
Expand All @@ -170,19 +177,22 @@ func (ro *RouteOpts) String() string {
for ln := range ro.GroupBy {
labels = append(labels, ln)
}
return fmt.Sprintf("<RouteOpts send_to:%q group_by:%q timers:%q|%q>", ro.Receiver, labels, ro.GroupWait, ro.GroupInterval)
return fmt.Sprintf("<RouteOpts send_to:%q group_by:%q group_by_all:%t timers:%q|%q>",
ro.Receiver, labels, ro.GroupByAll, ro.GroupWait, ro.GroupInterval)
}

// MarshalJSON returns a JSON representation of the routing options.
func (ro *RouteOpts) MarshalJSON() ([]byte, error) {
v := struct {
Receiver string `json:"receiver"`
GroupBy model.LabelNames `json:"groupBy"`
GroupByAll bool `json:"groupByAll"`
GroupWait time.Duration `json:"groupWait"`
GroupInterval time.Duration `json:"groupInterval"`
RepeatInterval time.Duration `json:"repeatInterval"`
}{
Receiver: ro.Receiver,
GroupByAll: ro.GroupByAll,
GroupWait: ro.GroupWait,
GroupInterval: ro.GroupInterval,
RepeatInterval: ro.RepeatInterval,
Expand Down
11 changes: 10 additions & 1 deletion dispatch/route_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ routes:
env: 'testing'

receiver: 'notify-testing'
group_by: []
group_by: [...]

- match:
env: "production"
Expand Down Expand Up @@ -110,6 +110,7 @@ routes:
{
Receiver: "notify-A",
GroupBy: def.GroupBy,
GroupByAll: false,
GroupWait: def.GroupWait,
GroupInterval: def.GroupInterval,
RepeatInterval: def.RepeatInterval,
Expand All @@ -126,6 +127,7 @@ routes:
{
Receiver: "notify-A",
GroupBy: def.GroupBy,
GroupByAll: false,
GroupWait: def.GroupWait,
GroupInterval: def.GroupInterval,
RepeatInterval: def.RepeatInterval,
Expand All @@ -141,6 +143,7 @@ routes:
{
Receiver: "notify-BC",
GroupBy: lset("foo", "bar"),
GroupByAll: false,
GroupWait: 2 * time.Minute,
GroupInterval: def.GroupInterval,
RepeatInterval: def.RepeatInterval,
Expand All @@ -157,6 +160,7 @@ routes:
{
Receiver: "notify-testing",
GroupBy: lset(),
GroupByAll: true,
GroupWait: def.GroupWait,
GroupInterval: def.GroupInterval,
RepeatInterval: def.RepeatInterval,
Expand All @@ -173,13 +177,15 @@ routes:
{
Receiver: "notify-productionA",
GroupBy: def.GroupBy,
GroupByAll: false,
GroupWait: 1 * time.Minute,
GroupInterval: def.GroupInterval,
RepeatInterval: def.RepeatInterval,
},
{
Receiver: "notify-productionB",
GroupBy: lset("job"),
GroupByAll: false,
GroupWait: 30 * time.Second,
GroupInterval: 5 * time.Minute,
RepeatInterval: 1 * time.Hour,
Expand All @@ -198,6 +204,7 @@ routes:
{
Receiver: "notify-def",
GroupBy: lset("role"),
GroupByAll: false,
GroupWait: def.GroupWait,
GroupInterval: def.GroupInterval,
RepeatInterval: def.RepeatInterval,
Expand All @@ -214,6 +221,7 @@ routes:
{
Receiver: "notify-testing",
GroupBy: lset("role"),
GroupByAll: false,
GroupWait: def.GroupWait,
GroupInterval: def.GroupInterval,
RepeatInterval: def.RepeatInterval,
Expand All @@ -231,6 +239,7 @@ routes:
{
Receiver: "notify-testing",
GroupBy: lset("role"),
GroupByAll: false,
GroupWait: 2 * time.Minute,
GroupInterval: def.GroupInterval,
RepeatInterval: def.RepeatInterval,
Expand Down
6 changes: 6 additions & 0 deletions doc/examples/simple.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@ route:
# The labels by which incoming alerts are grouped together. For example,
# multiple alerts coming in for cluster=A and alertname=LatencyHigh would
# be batched into a single group.
#
# To aggregate by all possible labels use '...' as the sole label name.
# This effectively disables aggregation entirely, passing through all
# alerts as-is. This is unlikely to be what you want, unless you have
# a very low alert volume or your upstream notification system performs
# its own grouping. Example: group_by: [...]
group_by: ['alertname', 'cluster', 'service']

# When a new group of alerts is created by an incoming alert, wait at
Expand Down