From 62b8f2506c0ec46dc2b501b898de7f4198c66c82 Mon Sep 17 00:00:00 2001 From: Kyryl Sablin Date: Wed, 17 Oct 2018 11:53:13 +0200 Subject: [PATCH 01/14] add group_by_all support Signed-off-by: Kyryl Sablin --- README.md | 5 ++- config/config.go | 5 +++ config/config_test.go | 33 ++++++++++++++++++ config/testdata/conf.good.yml | 3 ++ dispatch/dispatch.go | 19 +++++++---- dispatch/dispatch_test.go | 64 +++++++++++++++++++++++++++++++++++ dispatch/route.go | 12 ++++++- dispatch/route_test.go | 10 ++++++ doc/examples/simple.yml | 3 ++ 9 files changed, 145 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 677f5bdf93..3ba446f8c3 100644 --- a/README.md +++ b/README.md @@ -71,7 +71,10 @@ route: # multiple alerts coming in for cluster=A and alertname=LatencyHigh would # be batched into a single group. group_by: ['alertname', 'cluster'] - + + # if true all labels of alert will be used for grouping. If true then group_by should be empty. + group_by_all: false + # When a new group of alerts is created by an incoming alert, wait at # least 'group_wait' to send the initial notification. # This way ensures that you get multiple alerts for the same group that start diff --git a/config/config.go b/config/config.go index e4322e47bd..98d3a61688 100644 --- a/config/config.go +++ b/config/config.go @@ -496,6 +496,7 @@ func (c *GlobalConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { type Route struct { Receiver string `yaml:"receiver,omitempty" json:"receiver,omitempty"` GroupBy []model.LabelName `yaml:"group_by,omitempty" json:"group_by,omitempty"` + GroupByAll bool `yaml:"group_by_all,omitempty" json:"group_by_all,omitempty"` Match map[string]string `yaml:"match,omitempty" json:"match,omitempty"` MatchRE map[string]Regexp `yaml:"match_re,omitempty" json:"match_re,omitempty"` @@ -526,6 +527,10 @@ func (r *Route) UnmarshalYAML(unmarshal func(interface{}) error) error { } } + if len(r.GroupBy) > 0 && r.GroupByAll { + return fmt.Errorf("cannot have group_by_all is true and non-empty group_by") + } + groupBy := map[model.LabelName]struct{}{} for _, ln := range r.GroupBy { diff --git a/config/config_test.go b/config/config_test.go index a1b94e212c..776f3e121b 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -144,6 +144,27 @@ receivers: } +func TestNonEmptyGroupByAndGroupByAl(t *testing.T) { + in := ` +route: + group_by: ['alertname', 'cluster'] + group_by_all: true + +receivers: +- name: 'team-X-mails' +` + _, err := Load(in) + + expected := "cannot have group_by_all is true and non-empty group_by" + + if err == nil { + t.Fatalf("no error returned, expected:\n%q", expected) + } + if err.Error() != expected { + t.Errorf("\nexpected:\n%q\ngot:\n%q", expected, err.Error()) + } +} + func TestRootRouteExists(t *testing.T) { in := ` receivers: @@ -448,6 +469,7 @@ func TestEmptyFieldsAndRegex(t *testing.T) { "cluster", "service", }, + GroupByAll: false, Routes: []*Route{ { Receiver: "team-X-mails", @@ -506,6 +528,17 @@ func TestSMTPHello(t *testing.T) { } } +func TestGroupByAll(t *testing.T) { + c, _, err := LoadFile("testdata/conf.group-by-all.yml") + if err != nil { + t.Errorf("Error parsing %s: %s", "testdata/conf.group-by-all.yml", err) + } + + if !c.Route.GroupByAll { + t.Errorf("Invalid group by all param: expected to by true") + } +} + func TestVictorOpsDefaultAPIKey(t *testing.T) { conf, _, err := LoadFile("testdata/conf.victorops-default-apikey.yml") if err != nil { diff --git a/config/testdata/conf.good.yml b/config/testdata/conf.good.yml index 1fb59bb4c9..32134b2751 100644 --- a/config/testdata/conf.good.yml +++ b/config/testdata/conf.good.yml @@ -24,6 +24,9 @@ route: # be batched into a single group. group_by: ['alertname', 'cluster', 'service'] + # If true, each unique set of labels will constitute new group. + group_by_all: false + # When a new group of alerts is created by an incoming alert, wait at # least 'group_wait' to send the initial notification. # This way ensures that you get multiple alerts for the same group that start diff --git a/dispatch/dispatch.go b/dispatch/dispatch.go index a8397abe21..ffd45ec536 100644 --- a/dispatch/dispatch.go +++ b/dispatch/dispatch.go @@ -152,13 +152,7 @@ type notifyFunc func(context.Context, ...*types.Alert) bool // processAlert determines in which aggregation group the alert falls // and inserts it. func (d *Dispatcher) processAlert(alert *types.Alert, route *Route) { - groupLabels := model.LabelSet{} - - for ln, lv := range alert.Labels { - if _, ok := route.RouteOpts.GroupBy[ln]; ok { - groupLabels[ln] = lv - } - } + groupLabels := getGroupLabels(alert, route) fp := groupLabels.Fingerprint() @@ -189,6 +183,17 @@ func (d *Dispatcher) processAlert(alert *types.Alert, route *Route) { ag.insert(alert) } +func getGroupLabels(alert *types.Alert, route *Route) model.LabelSet { + groupLabels := model.LabelSet{} + for ln, lv := range alert.Labels { + if _, ok := route.RouteOpts.GroupBy[ln]; ok || route.RouteOpts.GroupByAll { + groupLabels[ln] = lv + } + } + + return groupLabels +} + // aggrGroup aggregates alert fingerprints into groups to which a // common set of routing options applies. // It emits notifications in the specified intervals. diff --git a/dispatch/dispatch_test.go b/dispatch/dispatch_test.go index 13099bc523..4cc91653c2 100644 --- a/dispatch/dispatch_test.go +++ b/dispatch/dispatch_test.go @@ -240,3 +240,67 @@ func TestAggrGroup(t *testing.T) { ag.stop() } + +func TestGroupLabels(t *testing.T) { + var a = &types.Alert{ + Alert: model.Alert{ + Labels: model.LabelSet{ + "a": "v1", + "b": "v2", + "c": "v3", + }, + }, + } + + route := &Route{ + RouteOpts: RouteOpts{ + GroupBy: map[model.LabelName]struct{}{ + "a": struct{}{}, + "b": struct{}{}, + }, + GroupByAll: false, + }, + } + + expLs := model.LabelSet{ + "a": "v1", + "b": "v2", + } + + ls := getGroupLabels(a, route) + + if !reflect.DeepEqual(ls, expLs) { + t.Fatalf("expected labels are %v, but got %v", expLs, ls) + } +} + +func TestGroupByAllLabels(t *testing.T) { + var a = &types.Alert{ + Alert: model.Alert{ + Labels: model.LabelSet{ + "a": "v1", + "b": "v2", + "c": "v3", + }, + }, + } + + route := &Route{ + RouteOpts: RouteOpts{ + GroupBy: map[model.LabelName]struct{}{}, + GroupByAll: true, + }, + } + + expLs := model.LabelSet{ + "a": "v1", + "b": "v2", + "c": "v3", + } + + ls := getGroupLabels(a, route) + + if !reflect.DeepEqual(ls, expLs) { + t.Fatalf("expected labels are %v, but got %v", expLs, ls) + } +} diff --git a/dispatch/route.go b/dispatch/route.go index add2afb64b..4c050ceeaa 100644 --- a/dispatch/route.go +++ b/dispatch/route.go @@ -32,6 +32,7 @@ var DefaultRouteOpts = RouteOpts{ GroupInterval: 5 * time.Minute, RepeatInterval: 4 * time.Hour, GroupBy: map[model.LabelName]struct{}{}, + GroupByAll: false, } // A Route is a node that contains definitions of how to handle alerts. @@ -69,6 +70,9 @@ func NewRoute(cr *config.Route, parent *Route) *Route { opts.GroupBy[ln] = struct{}{} } } + + opts.GroupByAll = cr.GroupByAll + if cr.GroupWait != nil { opts.GroupWait = time.Duration(*cr.GroupWait) } @@ -158,6 +162,9 @@ type RouteOpts struct { // What labels to group alerts by for notifications. GroupBy map[model.LabelName]struct{} + // Use all alert's labels to group + GroupByAll bool + // How long to wait to group matching alerts before sending // a notification. GroupWait time.Duration @@ -170,7 +177,8 @@ func (ro *RouteOpts) String() string { for ln := range ro.GroupBy { labels = append(labels, ln) } - return fmt.Sprintf("", ro.Receiver, labels, ro.GroupWait, ro.GroupInterval) + return fmt.Sprintf("", + ro.Receiver, labels, ro.GroupByAll, ro.GroupWait, ro.GroupInterval) } // MarshalJSON returns a JSON representation of the routing options. @@ -178,11 +186,13 @@ func (ro *RouteOpts) MarshalJSON() ([]byte, error) { v := struct { Receiver string `json:"receiver"` GroupBy model.LabelNames `json:"groupBy"` + GroupByAll bool `json:"groupByAll"` GroupWait time.Duration `json:"groupWait"` GroupInterval time.Duration `json:"groupInterval"` RepeatInterval time.Duration `json:"repeatInterval"` }{ Receiver: ro.Receiver, + GroupByAll: ro.GroupByAll, GroupWait: ro.GroupWait, GroupInterval: ro.GroupInterval, RepeatInterval: ro.RepeatInterval, diff --git a/dispatch/route_test.go b/dispatch/route_test.go index 396c41af2a..da5b115d7c 100644 --- a/dispatch/route_test.go +++ b/dispatch/route_test.go @@ -40,6 +40,7 @@ routes: receiver: 'notify-testing' group_by: [] + group_by_all: true - match: env: "production" @@ -110,6 +111,7 @@ routes: { Receiver: "notify-A", GroupBy: def.GroupBy, + GroupByAll: false, GroupWait: def.GroupWait, GroupInterval: def.GroupInterval, RepeatInterval: def.RepeatInterval, @@ -126,6 +128,7 @@ routes: { Receiver: "notify-A", GroupBy: def.GroupBy, + GroupByAll: false, GroupWait: def.GroupWait, GroupInterval: def.GroupInterval, RepeatInterval: def.RepeatInterval, @@ -141,6 +144,7 @@ routes: { Receiver: "notify-BC", GroupBy: lset("foo", "bar"), + GroupByAll: false, GroupWait: 2 * time.Minute, GroupInterval: def.GroupInterval, RepeatInterval: def.RepeatInterval, @@ -157,6 +161,7 @@ routes: { Receiver: "notify-testing", GroupBy: lset(), + GroupByAll: true, GroupWait: def.GroupWait, GroupInterval: def.GroupInterval, RepeatInterval: def.RepeatInterval, @@ -173,6 +178,7 @@ routes: { Receiver: "notify-productionA", GroupBy: def.GroupBy, + GroupByAll: false, GroupWait: 1 * time.Minute, GroupInterval: def.GroupInterval, RepeatInterval: def.RepeatInterval, @@ -180,6 +186,7 @@ routes: { Receiver: "notify-productionB", GroupBy: lset("job"), + GroupByAll: false, GroupWait: 30 * time.Second, GroupInterval: 5 * time.Minute, RepeatInterval: 1 * time.Hour, @@ -198,6 +205,7 @@ routes: { Receiver: "notify-def", GroupBy: lset("role"), + GroupByAll: false, GroupWait: def.GroupWait, GroupInterval: def.GroupInterval, RepeatInterval: def.RepeatInterval, @@ -214,6 +222,7 @@ routes: { Receiver: "notify-testing", GroupBy: lset("role"), + GroupByAll: false, GroupWait: def.GroupWait, GroupInterval: def.GroupInterval, RepeatInterval: def.RepeatInterval, @@ -231,6 +240,7 @@ routes: { Receiver: "notify-testing", GroupBy: lset("role"), + GroupByAll: false, GroupWait: 2 * time.Minute, GroupInterval: def.GroupInterval, RepeatInterval: def.RepeatInterval, diff --git a/doc/examples/simple.yml b/doc/examples/simple.yml index 3a2165cadb..efc27c9a1d 100644 --- a/doc/examples/simple.yml +++ b/doc/examples/simple.yml @@ -20,6 +20,9 @@ route: # be batched into a single group. group_by: ['alertname', 'cluster', 'service'] + # if true all labels of alert will be used for grouping. If true then group_by should be empty. + group_by_all: false + # When a new group of alerts is created by an incoming alert, wait at # least 'group_wait' to send the initial notification. # This way ensures that you get multiple alerts for the same group that start From 270a54ccd65666e24024be66861075080d69f23f Mon Sep 17 00:00:00 2001 From: Kyryl Sablin Date: Wed, 17 Oct 2018 14:04:49 +0200 Subject: [PATCH 02/14] fix styles Signed-off-by: Kyryl Sablin --- config/config.go | 6 +++--- dispatch/dispatch_test.go | 8 ++++---- dispatch/route.go | 6 +++--- dispatch/route_test.go | 18 +++++++++--------- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/config/config.go b/config/config.go index 98d3a61688..03fec65026 100644 --- a/config/config.go +++ b/config/config.go @@ -494,9 +494,9 @@ func (c *GlobalConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { // A Route is a node that contains definitions of how to handle alerts. type Route struct { - Receiver string `yaml:"receiver,omitempty" json:"receiver,omitempty"` - GroupBy []model.LabelName `yaml:"group_by,omitempty" json:"group_by,omitempty"` - GroupByAll bool `yaml:"group_by_all,omitempty" json:"group_by_all,omitempty"` + Receiver string `yaml:"receiver,omitempty" json:"receiver,omitempty"` + GroupBy []model.LabelName `yaml:"group_by,omitempty" json:"group_by,omitempty"` + GroupByAll bool `yaml:"group_by_all,omitempty" json:"group_by_all,omitempty"` Match map[string]string `yaml:"match,omitempty" json:"match,omitempty"` MatchRE map[string]Regexp `yaml:"match_re,omitempty" json:"match_re,omitempty"` diff --git a/dispatch/dispatch_test.go b/dispatch/dispatch_test.go index 4cc91653c2..400a04a295 100644 --- a/dispatch/dispatch_test.go +++ b/dispatch/dispatch_test.go @@ -254,11 +254,11 @@ func TestGroupLabels(t *testing.T) { route := &Route{ RouteOpts: RouteOpts{ - GroupBy: map[model.LabelName]struct{}{ + GroupBy: map[model.LabelName]struct{}{ "a": struct{}{}, "b": struct{}{}, }, - GroupByAll: false, + GroupByAll: false, }, } @@ -287,8 +287,8 @@ func TestGroupByAllLabels(t *testing.T) { route := &Route{ RouteOpts: RouteOpts{ - GroupBy: map[model.LabelName]struct{}{}, - GroupByAll: true, + GroupBy: map[model.LabelName]struct{}{}, + GroupByAll: true, }, } diff --git a/dispatch/route.go b/dispatch/route.go index 4c050ceeaa..6c41f2277c 100644 --- a/dispatch/route.go +++ b/dispatch/route.go @@ -32,7 +32,7 @@ var DefaultRouteOpts = RouteOpts{ GroupInterval: 5 * time.Minute, RepeatInterval: 4 * time.Hour, GroupBy: map[model.LabelName]struct{}{}, - GroupByAll: false, + GroupByAll: false, } // A Route is a node that contains definitions of how to handle alerts. @@ -186,13 +186,13 @@ func (ro *RouteOpts) MarshalJSON() ([]byte, error) { v := struct { Receiver string `json:"receiver"` GroupBy model.LabelNames `json:"groupBy"` - GroupByAll bool `json:"groupByAll"` + GroupByAll bool `json:"groupByAll"` GroupWait time.Duration `json:"groupWait"` GroupInterval time.Duration `json:"groupInterval"` RepeatInterval time.Duration `json:"repeatInterval"` }{ Receiver: ro.Receiver, - GroupByAll: ro.GroupByAll, + GroupByAll: ro.GroupByAll, GroupWait: ro.GroupWait, GroupInterval: ro.GroupInterval, RepeatInterval: ro.RepeatInterval, diff --git a/dispatch/route_test.go b/dispatch/route_test.go index da5b115d7c..459a1a3158 100644 --- a/dispatch/route_test.go +++ b/dispatch/route_test.go @@ -111,7 +111,7 @@ routes: { Receiver: "notify-A", GroupBy: def.GroupBy, - GroupByAll: false, + GroupByAll: false, GroupWait: def.GroupWait, GroupInterval: def.GroupInterval, RepeatInterval: def.RepeatInterval, @@ -128,7 +128,7 @@ routes: { Receiver: "notify-A", GroupBy: def.GroupBy, - GroupByAll: false, + GroupByAll: false, GroupWait: def.GroupWait, GroupInterval: def.GroupInterval, RepeatInterval: def.RepeatInterval, @@ -144,7 +144,7 @@ routes: { Receiver: "notify-BC", GroupBy: lset("foo", "bar"), - GroupByAll: false, + GroupByAll: false, GroupWait: 2 * time.Minute, GroupInterval: def.GroupInterval, RepeatInterval: def.RepeatInterval, @@ -161,7 +161,7 @@ routes: { Receiver: "notify-testing", GroupBy: lset(), - GroupByAll: true, + GroupByAll: true, GroupWait: def.GroupWait, GroupInterval: def.GroupInterval, RepeatInterval: def.RepeatInterval, @@ -178,7 +178,7 @@ routes: { Receiver: "notify-productionA", GroupBy: def.GroupBy, - GroupByAll: false, + GroupByAll: false, GroupWait: 1 * time.Minute, GroupInterval: def.GroupInterval, RepeatInterval: def.RepeatInterval, @@ -186,7 +186,7 @@ routes: { Receiver: "notify-productionB", GroupBy: lset("job"), - GroupByAll: false, + GroupByAll: false, GroupWait: 30 * time.Second, GroupInterval: 5 * time.Minute, RepeatInterval: 1 * time.Hour, @@ -205,7 +205,7 @@ routes: { Receiver: "notify-def", GroupBy: lset("role"), - GroupByAll: false, + GroupByAll: false, GroupWait: def.GroupWait, GroupInterval: def.GroupInterval, RepeatInterval: def.RepeatInterval, @@ -222,7 +222,7 @@ routes: { Receiver: "notify-testing", GroupBy: lset("role"), - GroupByAll: false, + GroupByAll: false, GroupWait: def.GroupWait, GroupInterval: def.GroupInterval, RepeatInterval: def.RepeatInterval, @@ -240,7 +240,7 @@ routes: { Receiver: "notify-testing", GroupBy: lset("role"), - GroupByAll: false, + GroupByAll: false, GroupWait: 2 * time.Minute, GroupInterval: def.GroupInterval, RepeatInterval: def.RepeatInterval, From fe5d1f9a631cd6e37ffd34b84b705b44ac75ecd8 Mon Sep 17 00:00:00 2001 From: Kyryl Sablin Date: Wed, 17 Oct 2018 14:15:36 +0200 Subject: [PATCH 03/14] add testdata Signed-off-by: Kyryl Sablin --- config/testdata/conf.group-by-all.yml | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 config/testdata/conf.group-by-all.yml diff --git a/config/testdata/conf.group-by-all.yml b/config/testdata/conf.group-by-all.yml new file mode 100644 index 0000000000..0576ace015 --- /dev/null +++ b/config/testdata/conf.group-by-all.yml @@ -0,0 +1,9 @@ +route: + group_by_all: true + group_wait: 30s + group_interval: 5m + repeat_interval: 3h + receiver: team-X + +receivers: +- name: 'team-X' From ace028ad9e343af1cebdc1a09c5d8633f950eae1 Mon Sep 17 00:00:00 2001 From: Kyryl Sablin Date: Mon, 22 Oct 2018 17:47:38 +0200 Subject: [PATCH 04/14] trigger build Signed-off-by: Kyryl Sablin --- config/testdata/conf.group-by-all.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/config/testdata/conf.group-by-all.yml b/config/testdata/conf.group-by-all.yml index 0576ace015..43a197fc9e 100644 --- a/config/testdata/conf.group-by-all.yml +++ b/config/testdata/conf.group-by-all.yml @@ -7,3 +7,4 @@ route: receivers: - name: 'team-X' + From 603898b6be1a171f2478194980bb03fa239a16f9 Mon Sep 17 00:00:00 2001 From: Kyryl Sablin Date: Fri, 2 Nov 2018 15:34:59 +0100 Subject: [PATCH 05/14] use ... as special label name to group be all Signed-off-by: Kyryl Sablin --- README.md | 6 ++--- config/config.go | 20 ++++++++++++---- config/config_test.go | 33 +++++++++++++++++++++++---- config/testdata/conf.good.yml | 5 ++-- config/testdata/conf.group-by-all.yml | 4 ++-- 5 files changed, 53 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 3ba446f8c3..b3a51eebad 100644 --- a/README.md +++ b/README.md @@ -70,11 +70,11 @@ route: # The labels by which incoming alerts are grouped together. For example, # multiple alerts coming in for cluster=A and alertname=LatencyHigh would # be batched into a single group. + # To aggregate by all labels use '...' instead of label names. + # The wildcard label should be used only alone. + # For example: group_by: [...] group_by: ['alertname', 'cluster'] - # if true all labels of alert will be used for grouping. If true then group_by should be empty. - group_by_all: false - # When a new group of alerts is created by an incoming alert, wait at # least 'group_wait' to send the initial notification. # This way ensures that you get multiple alerts for the same group that start diff --git a/config/config.go b/config/config.go index 03fec65026..bd8a7b0a42 100644 --- a/config/config.go +++ b/config/config.go @@ -494,9 +494,10 @@ func (c *GlobalConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { // A Route is a node that contains definitions of how to handle alerts. type Route struct { - Receiver string `yaml:"receiver,omitempty" json:"receiver,omitempty"` - GroupBy []model.LabelName `yaml:"group_by,omitempty" json:"group_by,omitempty"` - GroupByAll bool `yaml:"group_by_all,omitempty" json:"group_by_all,omitempty"` + Receiver string `yaml:"receiver,omitempty" json:"receiver,omitempty"` + GroupBy []model.LabelName + GroupByStr []string `yaml:"group_by,omitempty" json:"group_by,omitempty"` + GroupByAll bool Match map[string]string `yaml:"match,omitempty" json:"match,omitempty"` MatchRE map[string]Regexp `yaml:"match_re,omitempty" json:"match_re,omitempty"` @@ -526,9 +527,20 @@ func (r *Route) UnmarshalYAML(unmarshal func(interface{}) error) error { return fmt.Errorf("invalid label name %q", k) } } + for _, l := range r.GroupByStr { + if l == "..." { + r.GroupByAll = true + } else { + labelName := model.LabelName(l) + if !labelName.IsValid() { + return fmt.Errorf("invalid label name %q in group_by list", l) + } + r.GroupBy = append(r.GroupBy, labelName) + } + } if len(r.GroupBy) > 0 && r.GroupByAll { - return fmt.Errorf("cannot have group_by_all is true and non-empty group_by") + return fmt.Errorf("cannot have wildcard group_by and other other labels at same time") } groupBy := map[model.LabelName]struct{}{} diff --git a/config/config_test.go b/config/config_test.go index 776f3e121b..98c854e602 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -144,18 +144,37 @@ receivers: } -func TestNonEmptyGroupByAndGroupByAl(t *testing.T) { +func TestWildcardGroupByWithOtherGroupByLabels(t *testing.T) { in := ` route: - group_by: ['alertname', 'cluster'] - group_by_all: true + group_by: ['alertname', 'cluster', '...'] + receiver: team-X-mails +receivers: +- name: 'team-X-mails' +` + _, err := Load(in) + + expected := "cannot have wildcard group_by and other other labels at same time" + + if err == nil { + t.Fatalf("no error returned, expected:\n%q", expected) + } + if err.Error() != expected { + t.Errorf("\nexpected:\n%q\ngot:\n%q", expected, err.Error()) + } +} +func TestGroupByInvalidLabel(t *testing.T) { + in := ` +route: + group_by: ['-invalid-'] + receiver: team-X-mails receivers: - name: 'team-X-mails' ` _, err := Load(in) - expected := "cannot have group_by_all is true and non-empty group_by" + expected := "invalid label name \"-invalid-\" in group_by list" if err == nil { t.Fatalf("no error returned, expected:\n%q", expected) @@ -163,6 +182,7 @@ receivers: if err.Error() != expected { t.Errorf("\nexpected:\n%q\ngot:\n%q", expected, err.Error()) } + } func TestRootRouteExists(t *testing.T) { @@ -469,6 +489,11 @@ func TestEmptyFieldsAndRegex(t *testing.T) { "cluster", "service", }, + GroupByStr: []string{ + "alertname", + "cluster", + "service", + }, GroupByAll: false, Routes: []*Route{ { diff --git a/config/testdata/conf.good.yml b/config/testdata/conf.good.yml index 32134b2751..6a68fa0e0b 100644 --- a/config/testdata/conf.good.yml +++ b/config/testdata/conf.good.yml @@ -22,10 +22,11 @@ route: # The labels by which incoming alerts are grouped together. For example, # multiple alerts coming in for cluster=A and alertname=LatencyHigh would # be batched into a single group. + # To aggregate by all labels use '...' instead of label names. + # The wildcard label should be used only alone. + # For example: group_by: [...] group_by: ['alertname', 'cluster', 'service'] - # If true, each unique set of labels will constitute new group. - group_by_all: false # When a new group of alerts is created by an incoming alert, wait at # least 'group_wait' to send the initial notification. diff --git a/config/testdata/conf.group-by-all.yml b/config/testdata/conf.group-by-all.yml index 43a197fc9e..12c8f3b9f5 100644 --- a/config/testdata/conf.group-by-all.yml +++ b/config/testdata/conf.group-by-all.yml @@ -1,5 +1,5 @@ route: - group_by_all: true + group_by: [...] group_wait: 30s group_interval: 5m repeat_interval: 3h @@ -7,4 +7,4 @@ route: receivers: - name: 'team-X' - + From e1ff914b7ff0c21ea1297a94ed9eeac6136558f2 Mon Sep 17 00:00:00 2001 From: Kyryl Sablin Date: Fri, 2 Nov 2018 15:40:45 +0100 Subject: [PATCH 06/14] fix docs and improve struct Signed-off-by: Kyryl Sablin --- config/config.go | 3 ++- doc/examples/simple.yml | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/config/config.go b/config/config.go index bd8a7b0a42..f0e3568539 100644 --- a/config/config.go +++ b/config/config.go @@ -495,8 +495,9 @@ func (c *GlobalConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { // A Route is a node that contains definitions of how to handle alerts. type Route struct { Receiver string `yaml:"receiver,omitempty" json:"receiver,omitempty"` - GroupBy []model.LabelName + GroupByStr []string `yaml:"group_by,omitempty" json:"group_by,omitempty"` + GroupBy []model.LabelName GroupByAll bool Match map[string]string `yaml:"match,omitempty" json:"match,omitempty"` diff --git a/doc/examples/simple.yml b/doc/examples/simple.yml index efc27c9a1d..aa4bb12b48 100644 --- a/doc/examples/simple.yml +++ b/doc/examples/simple.yml @@ -18,11 +18,11 @@ route: # The labels by which incoming alerts are grouped together. For example, # multiple alerts coming in for cluster=A and alertname=LatencyHigh would # be batched into a single group. + # To aggregate by all labels use '...' instead of label names. + # The wildcard label should be used only alone. + # For example: group_by: [...] group_by: ['alertname', 'cluster', 'service'] - # if true all labels of alert will be used for grouping. If true then group_by should be empty. - group_by_all: false - # When a new group of alerts is created by an incoming alert, wait at # least 'group_wait' to send the initial notification. # This way ensures that you get multiple alerts for the same group that start From afdee73e845760397dd836ea8c4784c4993c4a4f Mon Sep 17 00:00:00 2001 From: Kyryl Sablin Date: Fri, 2 Nov 2018 15:42:14 +0100 Subject: [PATCH 07/14] fix style Signed-off-by: Kyryl Sablin --- config/config.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/config.go b/config/config.go index f0e3568539..0a3b8c9353 100644 --- a/config/config.go +++ b/config/config.go @@ -494,7 +494,7 @@ func (c *GlobalConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { // A Route is a node that contains definitions of how to handle alerts. type Route struct { - Receiver string `yaml:"receiver,omitempty" json:"receiver,omitempty"` + Receiver string `yaml:"receiver,omitempty" json:"receiver,omitempty"` GroupByStr []string `yaml:"group_by,omitempty" json:"group_by,omitempty"` GroupBy []model.LabelName From bbdbb8c349fd14b3708d0521ea0bb66e5960132e Mon Sep 17 00:00:00 2001 From: Kyryl Sablin Date: Fri, 2 Nov 2018 15:49:13 +0100 Subject: [PATCH 08/14] add usage warning Signed-off-by: Kyryl Sablin --- README.md | 2 ++ config/testdata/conf.good.yml | 3 --- doc/examples/simple.yml | 2 ++ 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index b3a51eebad..f50a5ecfb8 100644 --- a/README.md +++ b/README.md @@ -70,9 +70,11 @@ route: # The labels by which incoming alerts are grouped together. For example, # multiple alerts coming in for cluster=A and alertname=LatencyHigh would # be batched into a single group. + # # To aggregate by all labels use '...' instead of label names. # The wildcard label should be used only alone. # For example: group_by: [...] + # WARNING! This will create a separate alert for each unique label set, so use it wisely group_by: ['alertname', 'cluster'] # When a new group of alerts is created by an incoming alert, wait at diff --git a/config/testdata/conf.good.yml b/config/testdata/conf.good.yml index 6a68fa0e0b..b2bfaac60f 100644 --- a/config/testdata/conf.good.yml +++ b/config/testdata/conf.good.yml @@ -22,9 +22,6 @@ route: # The labels by which incoming alerts are grouped together. For example, # multiple alerts coming in for cluster=A and alertname=LatencyHigh would # be batched into a single group. - # To aggregate by all labels use '...' instead of label names. - # The wildcard label should be used only alone. - # For example: group_by: [...] group_by: ['alertname', 'cluster', 'service'] diff --git a/doc/examples/simple.yml b/doc/examples/simple.yml index aa4bb12b48..2621e2a73a 100644 --- a/doc/examples/simple.yml +++ b/doc/examples/simple.yml @@ -18,9 +18,11 @@ route: # The labels by which incoming alerts are grouped together. For example, # multiple alerts coming in for cluster=A and alertname=LatencyHigh would # be batched into a single group. + # # To aggregate by all labels use '...' instead of label names. # The wildcard label should be used only alone. # For example: group_by: [...] + # WARNING! This will create a separate alert for each unique label set, so use it wisely group_by: ['alertname', 'cluster', 'service'] # When a new group of alerts is created by an incoming alert, wait at From f1561a034b3bfe128797664079ec44de40411346 Mon Sep 17 00:00:00 2001 From: Kyryl Sablin Date: Fri, 2 Nov 2018 16:37:09 +0100 Subject: [PATCH 09/14] improve docs Signed-off-by: Kyryl Sablin --- README.md | 9 +++++---- doc/examples/simple.yml | 9 +++++---- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index f50a5ecfb8..c682e92246 100644 --- a/README.md +++ b/README.md @@ -71,10 +71,11 @@ route: # multiple alerts coming in for cluster=A and alertname=LatencyHigh would # be batched into a single group. # - # To aggregate by all labels use '...' instead of label names. - # The wildcard label should be used only alone. - # For example: group_by: [...] - # WARNING! This will create a separate alert for each unique label set, so use it wisely + # To aggregate by all possible labels use '...' as the sole label name. + # This effectively disables aggregation entirely, passing through all + # alerts as-is. This is unlikely to be what you want, unless you have + # a very low alert volume or your upstream notification system performs + # its own grouping. Example: group_by: [...] group_by: ['alertname', 'cluster'] # When a new group of alerts is created by an incoming alert, wait at diff --git a/doc/examples/simple.yml b/doc/examples/simple.yml index 2621e2a73a..921e718e70 100644 --- a/doc/examples/simple.yml +++ b/doc/examples/simple.yml @@ -19,10 +19,11 @@ route: # multiple alerts coming in for cluster=A and alertname=LatencyHigh would # be batched into a single group. # - # To aggregate by all labels use '...' instead of label names. - # The wildcard label should be used only alone. - # For example: group_by: [...] - # WARNING! This will create a separate alert for each unique label set, so use it wisely + # To aggregate by all possible labels use '...' as the sole label name. + # This effectively disables aggregation entirely, passing through all + # alerts as-is. This is unlikely to be what you want, unless you have + # a very low alert volume or your upstream notification system performs + # its own grouping. Example: group_by: [...] group_by: ['alertname', 'cluster', 'service'] # When a new group of alerts is created by an incoming alert, wait at From 533ce7fb9060cfff1683e69dadd70c2706e94bce Mon Sep 17 00:00:00 2001 From: Kyryl Sablin Date: Fri, 2 Nov 2018 16:48:49 +0100 Subject: [PATCH 10/14] fix source config in test Signed-off-by: Kyryl Sablin --- dispatch/route_test.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dispatch/route_test.go b/dispatch/route_test.go index 459a1a3158..e291b3f206 100644 --- a/dispatch/route_test.go +++ b/dispatch/route_test.go @@ -39,8 +39,7 @@ routes: env: 'testing' receiver: 'notify-testing' - group_by: [] - group_by_all: true + group_by: [...] - match: env: "production" From cf867cb8803e20e9dbe57e67616b80f6042b62fd Mon Sep 17 00:00:00 2001 From: Kyryl Sablin Date: Mon, 19 Nov 2018 17:18:56 +0100 Subject: [PATCH 11/14] remove space from README Signed-off-by: Kyryl Sablin --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c682e92246..281fa446c6 100644 --- a/README.md +++ b/README.md @@ -77,7 +77,7 @@ route: # a very low alert volume or your upstream notification system performs # its own grouping. Example: group_by: [...] group_by: ['alertname', 'cluster'] - + # When a new group of alerts is created by an incoming alert, wait at # least 'group_wait' to send the initial notification. # This way ensures that you get multiple alerts for the same group that start From a5623c1be4a7547bfe91e809169b23cf715ea6e8 Mon Sep 17 00:00:00 2001 From: Kyryl Sablin Date: Tue, 20 Nov 2018 10:03:20 +0100 Subject: [PATCH 12/14] fix grammar error Signed-off-by: Kyryl Sablin --- config/config.go | 2 +- config/config_test.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/config/config.go b/config/config.go index 0a3b8c9353..79ff29a9aa 100644 --- a/config/config.go +++ b/config/config.go @@ -541,7 +541,7 @@ func (r *Route) UnmarshalYAML(unmarshal func(interface{}) error) error { } if len(r.GroupBy) > 0 && r.GroupByAll { - return fmt.Errorf("cannot have wildcard group_by and other other labels at same time") + return fmt.Errorf("cannot have wildcard group_by and other other labels at the same time") } groupBy := map[model.LabelName]struct{}{} diff --git a/config/config_test.go b/config/config_test.go index 98c854e602..d0a83de7f8 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -154,7 +154,7 @@ receivers: ` _, err := Load(in) - expected := "cannot have wildcard group_by and other other labels at same time" + expected := "cannot have wildcard group_by and other other labels at the same time" if err == nil { t.Fatalf("no error returned, expected:\n%q", expected) From b7324d95b90645c8214c638362e2450221e71326 Mon Sep 17 00:00:00 2001 From: Kyryl Sablin Date: Wed, 21 Nov 2018 12:00:49 +0100 Subject: [PATCH 13/14] remove not needed newline Signed-off-by: Kyryl Sablin --- config/testdata/conf.good.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/config/testdata/conf.good.yml b/config/testdata/conf.good.yml index b2bfaac60f..1fb59bb4c9 100644 --- a/config/testdata/conf.good.yml +++ b/config/testdata/conf.good.yml @@ -24,7 +24,6 @@ route: # be batched into a single group. group_by: ['alertname', 'cluster', 'service'] - # When a new group of alerts is created by an incoming alert, wait at # least 'group_wait' to send the initial notification. # This way ensures that you get multiple alerts for the same group that start From 219a401b28aca87a50cc0a09b0512ab4e165d1a0 Mon Sep 17 00:00:00 2001 From: Kyryl Sablin Date: Thu, 22 Nov 2018 14:53:23 +0100 Subject: [PATCH 14/14] improve error message and polish comments Signed-off-by: Kyryl Sablin --- config/config.go | 2 +- config/config_test.go | 2 +- dispatch/route.go | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/config/config.go b/config/config.go index 79ff29a9aa..549d96485f 100644 --- a/config/config.go +++ b/config/config.go @@ -541,7 +541,7 @@ func (r *Route) UnmarshalYAML(unmarshal func(interface{}) error) error { } if len(r.GroupBy) > 0 && r.GroupByAll { - return fmt.Errorf("cannot have wildcard group_by and other other labels at the same time") + return fmt.Errorf("cannot have wildcard group_by (`...`) and other other labels at the same time") } groupBy := map[model.LabelName]struct{}{} diff --git a/config/config_test.go b/config/config_test.go index d0a83de7f8..dd73f37dd7 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -154,7 +154,7 @@ receivers: ` _, err := Load(in) - expected := "cannot have wildcard group_by and other other labels at the same time" + expected := "cannot have wildcard group_by (`...`) and other other labels at the same time" if err == nil { t.Fatalf("no error returned, expected:\n%q", expected) diff --git a/dispatch/route.go b/dispatch/route.go index 6c41f2277c..928d641a1e 100644 --- a/dispatch/route.go +++ b/dispatch/route.go @@ -162,7 +162,7 @@ type RouteOpts struct { // What labels to group alerts by for notifications. GroupBy map[model.LabelName]struct{} - // Use all alert's labels to group + // Use all alert labels to group. GroupByAll bool // How long to wait to group matching alerts before sending