diff --git a/config/config.go b/config/config.go index c20ae47486..3fa3e9e8e0 100644 --- a/config/config.go +++ b/config/config.go @@ -39,6 +39,29 @@ func (c Config) String() string { // Validate checks an entire parsed Config for the validity of its fields. func (c Config) Validate() error { + ncNames := map[string]bool{} + for _, nc := range c.NotificationConfig { + if nc.Name == nil { + return fmt.Errorf("Missing name in notification config: %s", proto.MarshalTextString(nc)) + } + for _, pdc := range nc.PagerdutyConfig { + if pdc.ServiceKey == nil { + return fmt.Errorf("Missing service key in PagerDuty notification config: %s", proto.MarshalTextString(pdc)) + } + } + for _, ec := range nc.EmailConfig { + if ec.Email == nil { + return fmt.Errorf("Missing email address in email notification config: %s", proto.MarshalTextString(ec)) + } + } + + if _, ok := ncNames[nc.GetName()]; ok { + return fmt.Errorf("Notification config name not unique: %s", nc.GetName()) + } + + ncNames[nc.GetName()] = true + } + for _, a := range c.AggregationRule { for _, f := range a.Filter { if f.NameRe == nil { @@ -48,7 +71,12 @@ func (c Config) Validate() error { return fmt.Errorf("Missing value pattern (value_re) in filter definition: %s", proto.MarshalTextString(f)) } } + + if _, ok := ncNames[a.GetNotificationConfigName()]; !ok { + return fmt.Errorf("No such notification config: %s", a.GetNotificationConfigName()) + } } + return nil } @@ -61,8 +89,9 @@ func (c Config) AggregationRules() manager.AggregationRules { filters = append(filters, manager.NewFilter(filter.GetNameRe(), filter.GetValueRe())) } rules = append(rules, &manager.AggregationRule{ - Filters: filters, - RepeatRate: time.Duration(r.GetRepeatRateSeconds()) * time.Second, + Filters: filters, + RepeatRate: time.Duration(r.GetRepeatRateSeconds()) * time.Second, + NotificationConfigName: r.GetNotificationConfigName(), }) } return rules diff --git a/config/config.proto b/config/config.proto index 87aa84f5d5..1efb986d8a 100644 --- a/config/config.proto +++ b/config/config.proto @@ -13,6 +13,29 @@ package io.prometheus.alert_manager; +// Configuration for notification via PagerDuty. +message PagerDutyConfig { + // PagerDuty service key, see: + // http://developer.pagerduty.com/documentation/integration/events + optional string service_key = 1; +} + +// Configuration for notification via mail. +message EmailConfig { + // Email address to notify. + optional string email = 1; +} + +// Notification configuration definition. +message NotificationConfig { + // Name of this NotificationConfig. Referenced from AggregationRule. + optional string name = 1; + // Zero or more PagerDuty notification configurations. + repeated PagerDutyConfig pagerduty_config = 2; + // Zero or more email notification configurations. + repeated EmailConfig email_config = 3; +} + // A regex-based label filter used in aggregations. message Filter { // The regex matching the label name. @@ -21,11 +44,21 @@ message Filter { optional string value_re = 2; } +// Grouping and notification setting definitions for alerts. message AggregationRule { + // Filters that define which alerts are matched by this AggregationRule. repeated Filter filter = 1; + // How many seconds to wait before resending a notification for a specific alert. optional int32 repeat_rate_seconds = 2 [default = 7200]; + // Notification configuration to use for this AggregationRule, referenced by + // their name. + optional string notification_config_name = 3; } +// Global alert manager configuration. message AlertManagerConfig { + // Aggregation rule definitions. repeated AggregationRule aggregation_rule = 1; + // Notification configuration definitions. + repeated NotificationConfig notification_config = 2; } diff --git a/config/config_test.go b/config/config_test.go index 803d7e118e..01497d04b1 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -48,13 +48,25 @@ func TestConfigs(t *testing.T) { }, { inputFile: "sample.conf.input", }, { - inputFile: "missing_name_re.conf.input", + inputFile: "missing_filter_name_re.conf.input", shouldFail: true, errContains: "Missing name pattern", }, { inputFile: "invalid_proto_format.conf.input", shouldFail: true, errContains: "unknown field name", + }, { + inputFile: "duplicate_nc_name.conf.input", + shouldFail: true, + errContains: "not unique", + }, { + inputFile: "nonexistent_nc_name.conf.input", + shouldFail: true, + errContains: "No such notification config", + }, { + inputFile: "missing_nc_name.conf.input", + shouldFail: true, + errContains: "Missing name", }, } diff --git a/config/fixtures/duplicate_nc_name.conf.input b/config/fixtures/duplicate_nc_name.conf.input new file mode 100644 index 0000000000..3ba8c9e964 --- /dev/null +++ b/config/fixtures/duplicate_nc_name.conf.input @@ -0,0 +1,28 @@ +notification_config { + name: "alertmanager_test" + pagerduty_config { + service_key: "supersecretapikey" + } + email_config { + email: "test@testservice.org" + } +} + +notification_config { + name: "alertmanager_test" + pagerduty_config { + service_key: "supersecretapikey" + } + email_config { + email: "test@testservice.org" + } +} + +aggregation_rule { + filter { + name_re: "service" + value_re: "test" + } + repeat_rate_seconds: 3600 + notification_config_name: "alertmanager_test" +} diff --git a/config/fixtures/missing_filter_name_re.conf.input b/config/fixtures/missing_filter_name_re.conf.input new file mode 100644 index 0000000000..2926f9e4a0 --- /dev/null +++ b/config/fixtures/missing_filter_name_re.conf.input @@ -0,0 +1,6 @@ +aggregation_rule { + filter { + value_re: "test" + } + repeat_rate_seconds: 3600 +} diff --git a/config/fixtures/missing_nc_name.conf.input b/config/fixtures/missing_nc_name.conf.input new file mode 100644 index 0000000000..e80bb89bbd --- /dev/null +++ b/config/fixtures/missing_nc_name.conf.input @@ -0,0 +1,27 @@ +notification_config { + pagerduty_config { + service_key: "supersecretapikey" + } + email_config { + email: "test@testservice.org" + } +} + +notification_config { + name: "alertmanager_test" + pagerduty_config { + service_key: "supersecretapikey" + } + email_config { + email: "test@testservice.org" + } +} + +aggregation_rule { + filter { + name_re: "service" + value_re: "test" + } + repeat_rate_seconds: 3600 + notification_config_name: "alertmanager_test" +} diff --git a/config/fixtures/nonexistent_nc_name.conf.input b/config/fixtures/nonexistent_nc_name.conf.input new file mode 100644 index 0000000000..09b0b7d75a --- /dev/null +++ b/config/fixtures/nonexistent_nc_name.conf.input @@ -0,0 +1,18 @@ +notification_config { + name: "alertmanager_test" + pagerduty_config { + service_key: "supersecretapikey" + } + email_config { + email: "test@testservice.org" + } +} + +aggregation_rule { + filter { + name_re: "service" + value_re: "test" + } + repeat_rate_seconds: 3600 + notification_config_name: "alertmanager_test2" +} diff --git a/config/fixtures/sample.conf.input b/config/fixtures/sample.conf.input index 4922f502a8..fcf5119d5f 100644 --- a/config/fixtures/sample.conf.input +++ b/config/fixtures/sample.conf.input @@ -1,13 +1,11 @@ -aggregation_rule { - filter { - name_re: "service" - value_re: "discovery" +notification_config { + name: "alertmanager_test" + pagerduty_config { + service_key: "supersecretapikey" } - filter { - name_re: "zone" - value_re: "aa" + email_config { + email: "test@testservice.org" } - repeat_rate_seconds: 3600 } aggregation_rule { @@ -15,4 +13,6 @@ aggregation_rule { name_re: "service" value_re: "test" } + repeat_rate_seconds: 3600 + notification_config_name: "alertmanager_test" } diff --git a/config/generated/config.pb.go b/config/generated/config.pb.go index ea62880093..267ccd3a24 100644 --- a/config/generated/config.pb.go +++ b/config/generated/config.pb.go @@ -13,6 +13,70 @@ var _ = proto.Marshal var _ = &json.SyntaxError{} var _ = math.Inf +type PagerDutyConfig struct { + ServiceKey *string `protobuf:"bytes,1,opt,name=service_key" json:"service_key,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *PagerDutyConfig) Reset() { *m = PagerDutyConfig{} } +func (m *PagerDutyConfig) String() string { return proto.CompactTextString(m) } +func (*PagerDutyConfig) ProtoMessage() {} + +func (m *PagerDutyConfig) GetServiceKey() string { + if m != nil && m.ServiceKey != nil { + return *m.ServiceKey + } + return "" +} + +type EmailConfig struct { + Email *string `protobuf:"bytes,1,opt,name=email" json:"email,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *EmailConfig) Reset() { *m = EmailConfig{} } +func (m *EmailConfig) String() string { return proto.CompactTextString(m) } +func (*EmailConfig) ProtoMessage() {} + +func (m *EmailConfig) GetEmail() string { + if m != nil && m.Email != nil { + return *m.Email + } + return "" +} + +type NotificationConfig struct { + Name *string `protobuf:"bytes,1,opt,name=name" json:"name,omitempty"` + PagerdutyConfig []*PagerDutyConfig `protobuf:"bytes,2,rep,name=pagerduty_config" json:"pagerduty_config,omitempty"` + EmailConfig []*EmailConfig `protobuf:"bytes,3,rep,name=email_config" json:"email_config,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *NotificationConfig) Reset() { *m = NotificationConfig{} } +func (m *NotificationConfig) String() string { return proto.CompactTextString(m) } +func (*NotificationConfig) ProtoMessage() {} + +func (m *NotificationConfig) GetName() string { + if m != nil && m.Name != nil { + return *m.Name + } + return "" +} + +func (m *NotificationConfig) GetPagerdutyConfig() []*PagerDutyConfig { + if m != nil { + return m.PagerdutyConfig + } + return nil +} + +func (m *NotificationConfig) GetEmailConfig() []*EmailConfig { + if m != nil { + return m.EmailConfig + } + return nil +} + type Filter struct { NameRe *string `protobuf:"bytes,1,opt,name=name_re" json:"name_re,omitempty"` ValueRe *string `protobuf:"bytes,2,opt,name=value_re" json:"value_re,omitempty"` @@ -38,9 +102,10 @@ func (m *Filter) GetValueRe() string { } type AggregationRule struct { - Filter []*Filter `protobuf:"bytes,1,rep,name=filter" json:"filter,omitempty"` - RepeatRateSeconds *int32 `protobuf:"varint,2,opt,name=repeat_rate_seconds,def=7200" json:"repeat_rate_seconds,omitempty"` - XXX_unrecognized []byte `json:"-"` + Filter []*Filter `protobuf:"bytes,1,rep,name=filter" json:"filter,omitempty"` + RepeatRateSeconds *int32 `protobuf:"varint,2,opt,name=repeat_rate_seconds,def=7200" json:"repeat_rate_seconds,omitempty"` + NotificationConfigName *string `protobuf:"bytes,3,opt,name=notification_config_name" json:"notification_config_name,omitempty"` + XXX_unrecognized []byte `json:"-"` } func (m *AggregationRule) Reset() { *m = AggregationRule{} } @@ -63,9 +128,17 @@ func (m *AggregationRule) GetRepeatRateSeconds() int32 { return Default_AggregationRule_RepeatRateSeconds } +func (m *AggregationRule) GetNotificationConfigName() string { + if m != nil && m.NotificationConfigName != nil { + return *m.NotificationConfigName + } + return "" +} + type AlertManagerConfig struct { - AggregationRule []*AggregationRule `protobuf:"bytes,1,rep,name=aggregation_rule" json:"aggregation_rule,omitempty"` - XXX_unrecognized []byte `json:"-"` + AggregationRule []*AggregationRule `protobuf:"bytes,1,rep,name=aggregation_rule" json:"aggregation_rule,omitempty"` + NotificationConfig []*NotificationConfig `protobuf:"bytes,2,rep,name=notification_config" json:"notification_config,omitempty"` + XXX_unrecognized []byte `json:"-"` } func (m *AlertManagerConfig) Reset() { *m = AlertManagerConfig{} } @@ -79,5 +152,12 @@ func (m *AlertManagerConfig) GetAggregationRule() []*AggregationRule { return nil } +func (m *AlertManagerConfig) GetNotificationConfig() []*NotificationConfig { + if m != nil { + return m.NotificationConfig + } + return nil +} + func init() { } diff --git a/main.go b/main.go index f7c5263335..c37a603c5c 100644 --- a/main.go +++ b/main.go @@ -38,9 +38,10 @@ func main() { suppressor := manager.NewSuppressor() defer suppressor.Close() - summarizer := manager.NewSummaryDispatcher() + notifier := manager.NewNotifier(conf.NotificationConfig) + defer notifier.Close() - aggregator := manager.NewAggregator(summarizer) + aggregator := manager.NewAggregator(notifier) defer aggregator.Close() webService := &web.WebService{ @@ -52,7 +53,8 @@ func main() { // Template-based page handlers. AlertsHandler: &web.AlertsHandler{ - Aggregator: aggregator, + Aggregator: aggregator, + IsInhibitedInterrogator: suppressor, }, SilencesHandler: &web.SilencesHandler{ Suppressor: suppressor, @@ -60,8 +62,8 @@ func main() { } go webService.ServeForever() - aggregator.SetRules(conf.AggregationRules()) + aggregator.SetRules(conf.AggregationRules()) log.Println("Running summary dispatcher...") - summarizer.Dispatch(suppressor) + notifier.Dispatch(suppressor) } diff --git a/manager/aggregator.go b/manager/aggregator.go index a15428dbbb..b44e00b442 100644 --- a/manager/aggregator.go +++ b/manager/aggregator.go @@ -28,9 +28,9 @@ const ( // AggregationRule creates and manages the scope for received events. type AggregationRule struct { - Filters Filters - - RepeatRate time.Duration + Filters Filters + RepeatRate time.Duration + NotificationConfigName string } type AggregationInstances []*AggregationInstance @@ -65,28 +65,26 @@ func (r *AggregationInstance) Ingest(e *Event) { r.expiryTimer.Reset(minimumRefreshPeriod) } -func (r *AggregationInstance) SendNotification(s SummaryReceiver) { +func (r *AggregationInstance) SendNotification(n Notifier) { if time.Since(r.lastNotificationSent) < r.Rule.RepeatRate { return } - err := s.Receive(&EventSummary{ - Rule: r.Rule, - Event: r.Event, - }) + err := n.QueueNotification(r.Event, r.Rule.NotificationConfigName) if err != nil { + // BUG: Limit the number of retries. log.Printf("Error while sending notification: %s, retrying in %v", err, notificationRetryPeriod) - r.resendNotificationAfter(notificationRetryPeriod, s) + r.resendNotificationAfter(notificationRetryPeriod, n) return } - r.resendNotificationAfter(r.Rule.RepeatRate, s) + r.resendNotificationAfter(r.Rule.RepeatRate, n) r.lastNotificationSent = time.Now() } -func (r *AggregationInstance) resendNotificationAfter(d time.Duration, s SummaryReceiver) { +func (r *AggregationInstance) resendNotificationAfter(d time.Duration, n Notifier) { r.notificationResendTimer = time.AfterFunc(d, func() { - r.SendNotification(s) + r.SendNotification(n) }) } @@ -102,18 +100,18 @@ func (r *AggregationInstance) Close() { type AggregationRules []*AggregationRule type Aggregator struct { - Rules AggregationRules - Aggregates map[EventFingerprint]*AggregationInstance - SummaryReceiver SummaryReceiver + Rules AggregationRules + Aggregates map[EventFingerprint]*AggregationInstance + Notifier Notifier // Mutex to protect the above. mu sync.Mutex } -func NewAggregator(s SummaryReceiver) *Aggregator { +func NewAggregator(n Notifier) *Aggregator { return &Aggregator{ - Aggregates: make(map[EventFingerprint]*AggregationInstance), - SummaryReceiver: s, + Aggregates: make(map[EventFingerprint]*AggregationInstance), + Notifier: n, } } @@ -153,7 +151,7 @@ func (a *Aggregator) Receive(events Events) error { } aggregation.Ingest(e) - aggregation.SendNotification(a.SummaryReceiver) + aggregation.SendNotification(a.Notifier) break } } diff --git a/manager/aggregator_test.go b/manager/aggregator_test.go index b13b9582f9..a878943b3b 100644 --- a/manager/aggregator_test.go +++ b/manager/aggregator_test.go @@ -15,14 +15,22 @@ package manager import ( "testing" + + pb "github.com/prometheus/alert_manager/config/generated" ) -type dummyReceiver struct{} +type dummyNotifier struct{} -func (d *dummyReceiver) Receive(*EventSummary) RemoteError { +func (d *dummyNotifier) QueueNotification(*Event, string) error { return nil } +func (d *dummyNotifier) SetNotificationConfigs([]*pb.NotificationConfig) {} + +func (d *dummyNotifier) Dispatch(IsInhibitedInterrogator) {} + +func (d *dummyNotifier) Close() {} + type testAggregatorScenario struct { rules AggregationRules inMatch Events @@ -30,7 +38,7 @@ type testAggregatorScenario struct { } func (s *testAggregatorScenario) test(i int, t *testing.T) { - a := NewAggregator(&dummyReceiver{}) + a := NewAggregator(&dummyNotifier{}) a.SetRules(s.rules) if len(s.inMatch) > 0 { diff --git a/manager/dispatcher.go b/manager/dispatcher.go deleted file mode 100644 index 5028ada494..0000000000 --- a/manager/dispatcher.go +++ /dev/null @@ -1,138 +0,0 @@ -// Copyright 2013 Prometheus Team -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package manager - -import ( - "strings" -) - -type DestinationDispatcher interface { - Send(*EventSummary) error -} - -func DispatcherFor(destination string) DestinationDispatcher { - switch { - case strings.HasPrefix(destination, "IRC"): - case strings.HasPrefix(destination, "TRELLO"): - case strings.HasPrefix(destination, "MAIL"): - case strings.HasPrefix(destination, "PAGERDUTY"): - } - return nil -} - -type EventSummary struct { - Rule *AggregationRule - - Event *Event - - Destination string -} - -type SummaryDispatcher struct { - summaryReqs chan *summaryDispatchRequest - - closed chan bool -} - -type summaryDispatchRequest struct { - Summary *EventSummary - - Response chan *summaryDispatchResponse -} - -type Disposition int - -const ( - UNHANDLED Disposition = iota - DISPATCHED - SUPPRESSED -) - -type summaryDispatchResponse struct { - Disposition Disposition - Err RemoteError -} - -func (s *SummaryDispatcher) Close() { - close(s.summaryReqs) - <-s.closed -} - -func NewSummaryDispatcher() *SummaryDispatcher { - return &SummaryDispatcher{ - summaryReqs: make(chan *summaryDispatchRequest), - closed: make(chan bool), - } -} - -type RemoteError interface { - error - - Retryable() bool -} - -type remoteError struct { - error - - retryable bool -} - -func (e *remoteError) Retryable() bool { - return e.retryable -} - -func NewRemoteError(err error, retryable bool) RemoteError { - return &remoteError{ - err, - retryable, - } -} - -type SummaryReceiver interface { - Receive(*EventSummary) RemoteError -} - -func (d *SummaryDispatcher) Receive(s *EventSummary) RemoteError { - req := &summaryDispatchRequest{ - Summary: s, - Response: make(chan *summaryDispatchResponse), - } - - d.summaryReqs <- req - resp := <-req.Response - - return resp.Err -} - -func (d *SummaryDispatcher) dispatchSummary(r *summaryDispatchRequest, i IsInhibitedInterrogator) { - if inhibited, _ := i.IsInhibited(r.Summary.Event); inhibited { - r.Response <- &summaryDispatchResponse{ - Disposition: SUPPRESSED, - } - return - } - - // BUG: Perform sending of summaries. - r.Response <- &summaryDispatchResponse{ - Disposition: DISPATCHED, - } -} - -func (d *SummaryDispatcher) Dispatch(i IsInhibitedInterrogator) { - for req := range d.summaryReqs { - d.dispatchSummary(req, i) - } - - d.closed <- true -} diff --git a/manager/event.go b/manager/event.go index 8d309c463a..8dc73e482d 100644 --- a/manager/event.go +++ b/manager/event.go @@ -19,7 +19,7 @@ import ( "sort" ) -const eventNameLabel = "name" +const EventNameLabel = "alertname" type EventFingerprint uint64 @@ -28,16 +28,19 @@ type EventPayload map[string]string // Event models an action triggered by Prometheus. type Event struct { + // Short summary of event. + Summary string + // Long description of event. + Description string // Label value pairs for purpose of aggregation, matching, and disposition - // dispatching. This must minimally include a "name" label. + // dispatching. This must minimally include an "alertname" label. Labels EventLabels // Extra key/value information which is not used for aggregation. Payload EventPayload } func (e Event) Name() string { - // BUG: ensure in a proper place that all events have a name? - return e.Labels[eventNameLabel] + return e.Labels[EventNameLabel] } func (e Event) Fingerprint() EventFingerprint { @@ -51,8 +54,9 @@ func (e Event) Fingerprint() EventFingerprint { summer := fnv.New64a() + separator := string([]byte{0}) for _, k := range keys { - fmt.Fprintf(summer, k, e.Labels[k]) + fmt.Fprintf(summer, "%s%s%s%s", k, separator, e.Labels[k], separator) } return EventFingerprint(summer.Sum64()) diff --git a/manager/notifier.go b/manager/notifier.go new file mode 100644 index 0000000000..bc8359267c --- /dev/null +++ b/manager/notifier.go @@ -0,0 +1,173 @@ +// Copyright 2013 Prometheus Team +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package manager + +import ( + "bytes" + "encoding/json" + "flag" + "fmt" + "io/ioutil" + "log" + "net/http" + "sync" + + pb "github.com/prometheus/alert_manager/config/generated" +) + +const contentTypeJson = "application/json" + +var ( + notificationBufferSize = flag.Int("notificationBufferSize", 1000, "Size of buffer for pending notifications.") + pagerdutyApiUrl = flag.String("pagerdutyApiUrl", "https://events.pagerduty.com/generic/2010-04-15/create_event.json", "PagerDuty API URL.") +) + +// A Notifier is responsible for sending notifications for events according to +// a provided notification configuration. +type Notifier interface { + // Queue a notification for asynchronous dispatching. + QueueNotification(e *Event, configName string) error + // Replace current notification configs. Already enqueued messages will remain + // unaffected. + SetNotificationConfigs([]*pb.NotificationConfig) + // Start event notification dispatch loop. + Dispatch(IsInhibitedInterrogator) + // Stop the event notification dispatch loop. + Close() +} + +// Request for sending a notification. +type notificationReq struct { + event *Event + notificationConfig *pb.NotificationConfig +} + +// Alert notification multiplexer and dispatcher. +type notifier struct { + // Notifications that are queued to be sent. + pendingNotifications chan *notificationReq + + // Mutex to protect the fields below. + mu sync.Mutex + // Map of notification configs by name. + notificationConfigs map[string]*pb.NotificationConfig +} + +// Construct a new notifier. +func NewNotifier(configs []*pb.NotificationConfig) *notifier { + notifier := ¬ifier{ + pendingNotifications: make(chan *notificationReq, *notificationBufferSize), + } + notifier.SetNotificationConfigs(configs) + return notifier +} + +func (n *notifier) SetNotificationConfigs(configs []*pb.NotificationConfig) { + n.mu.Lock() + defer n.mu.Unlock() + + n.notificationConfigs = map[string]*pb.NotificationConfig{} + for _, c := range configs { + n.notificationConfigs[c.GetName()] = c + } +} + +func (n *notifier) QueueNotification(event *Event, configName string) error { + n.mu.Lock() + nc, ok := n.notificationConfigs[configName] + n.mu.Unlock() + + if !ok { + return fmt.Errorf("No such notification configuration %s", configName) + } + + // We need to save a reference to the notification config in the + // notificationReq since the config might be replaced or gone at the time the + // message gets dispatched. + n.pendingNotifications <- ¬ificationReq{ + event: event, + notificationConfig: nc, + } + return nil +} + +func (n *notifier) sendPagerDutyNotification(serviceKey string, event *Event) error { + // http://developer.pagerduty.com/documentation/integration/events/trigger + incidentKey := event.Fingerprint() + buf, err := json.Marshal(map[string]interface{}{ + "service_key": serviceKey, + "event_type": "trigger", + "description": event.Description, + "incident_key": incidentKey, + "details": map[string]interface{}{ + "grouping_labels": event.Labels, + "extra_labels": event.Payload, + }, + }) + if err != nil { + return err + } + + resp, err := http.Post( + *pagerdutyApiUrl, + contentTypeJson, + bytes.NewBuffer(buf), + ) + if err != nil { + return err + } + defer resp.Body.Close() + + respBuf, err := ioutil.ReadAll(resp.Body) + if err != nil { + return err + } + + log.Printf("Sent PagerDuty notification: %v: HTTP %d: %s", incidentKey, resp.StatusCode, respBuf) + // BUG: Check response for result of operation. + return nil +} + +func (n *notifier) sendEmailNotification(email string, event *Event) error { + // BUG: Implement email notifications. + log.Printf("Would send email notification for event %s to %s\n", event, email) + return nil +} + +func (n *notifier) handleNotification(event *Event, config *pb.NotificationConfig, i IsInhibitedInterrogator) { + if inhibited, _ := i.IsInhibited(event); inhibited { + return + } + + for _, pdConfig := range config.PagerdutyConfig { + if err := n.sendPagerDutyNotification(pdConfig.GetServiceKey(), event); err != nil { + log.Printf("Error sending PagerDuty notification: %s", err) + } + } + for _, emailConfig := range config.EmailConfig { + if err := n.sendEmailNotification(emailConfig.GetEmail(), event); err != nil { + log.Printf("Error sending email notification: %s", err) + } + } +} + +func (n *notifier) Dispatch(i IsInhibitedInterrogator) { + for req := range n.pendingNotifications { + n.handleNotification(req.event, req.notificationConfig, i) + } +} + +func (n *notifier) Close() { + close(n.pendingNotifications) +} diff --git a/web/alerts.go b/web/alerts.go index 123e291c7d..6e9c173258 100644 --- a/web/alerts.go +++ b/web/alerts.go @@ -21,15 +21,23 @@ import ( type AlertStatus struct { AlertAggregates []*manager.AggregationInstance + SilenceForEvent func(*manager.Event) *manager.Suppression } type AlertsHandler struct { - Aggregator *manager.Aggregator + Aggregator *manager.Aggregator + IsInhibitedInterrogator manager.IsInhibitedInterrogator +} + +func (h *AlertsHandler) silenceForEvent(e *manager.Event) *manager.Suppression { + _, silence := h.IsInhibitedInterrogator.IsInhibited(e) + return silence } func (h *AlertsHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { alertStatus := &AlertStatus{ AlertAggregates: h.Aggregator.AlertAggregates(), + SilenceForEvent: h.silenceForEvent, } executeTemplate(w, "alerts", alertStatus) } diff --git a/web/api/event.go b/web/api/event.go index 509f085d0c..90fa94884e 100644 --- a/web/api/event.go +++ b/web/api/event.go @@ -14,14 +14,31 @@ package api import ( + "log" "net/http" "github.com/prometheus/alert_manager/manager" ) func (s AlertManagerService) AddEvents(es manager.Events) { + for i, ev := range es { + if ev.Summary == "" || ev.Description == "" { + log.Printf("Missing field in event %d: %s", i, ev) + rb := s.ResponseBuilder() + rb.SetResponseCode(http.StatusBadRequest) + return + } + if _, ok := ev.Labels[manager.EventNameLabel]; !ok { + log.Printf("Missing alert name label in event %d: %s", i, ev) + rb := s.ResponseBuilder() + rb.SetResponseCode(http.StatusBadRequest) + return + } + } + err := s.Aggregator.Receive(es) if err != nil { + log.Println("Error during aggregation:", err) rb := s.ResponseBuilder() rb.SetResponseCode(http.StatusServiceUnavailable) } diff --git a/web/helpers.go b/web/helpers.go index ac5b6659ab..00631e9a44 100644 --- a/web/helpers.go +++ b/web/helpers.go @@ -15,6 +15,7 @@ package web import ( "html/template" + "reflect" "time" ) @@ -22,6 +23,31 @@ func timeSince(t time.Time) string { return time.Now().Round(time.Second / 10).Sub(t.Round(time.Second / 10)).String() } +// By Russ Cox, https://groups.google.com/d/msg/golang-nuts/OEdSDgEC7js/iyhU9DW_IKcJ. +func eq(args ...interface{}) bool { + if len(args) == 0 { + return false + } + x := args[0] + switch x := x.(type) { + case string, int, int64, byte, float32, float64: + for _, y := range args[1:] { + if x == y { + return true + } + } + return false + } + + for _, y := range args[1:] { + if reflect.DeepEqual(x, y) { + return true + } + } + return false +} + var webHelpers = template.FuncMap{ "timeSince": timeSince, + "eq": eq, } diff --git a/web/static/js/alerts.js b/web/static/js/alerts.js index 925aa1104b..f61e3f619b 100644 --- a/web/static/js/alerts.js +++ b/web/static/js/alerts.js @@ -14,8 +14,8 @@ function addSilenceLabel(label, value) { } $("#silence_filters_table").append( '' + - ' ' + - ' ' + + ' ' + + ' ' + ' ' + ''); bindDelLabel(); @@ -195,6 +195,10 @@ function init() { $(".del_silence_btn").click(function() { deleteSilence(silenceId, silenceRow); }); + + $(".silence_link").click(function() { + alert("Linking to silence not yet supported"); + }); } $(init); diff --git a/web/templates/alerts.html b/web/templates/alerts.html index 315afcb331..9200b27cf3 100644 --- a/web/templates/alerts.html +++ b/web/templates/alerts.html @@ -17,23 +17,26 @@

Alerts

Labels Active Since Last Refreshed - Surpressed + Silenced + {{$silenceForEvent := .SilenceForEvent}} {{range .AlertAggregates}} {{index .Event.Name}}
- - + + Silence Alert
{{range $label, $value := .Event.Labels}} - {{$label}}="{{$value}}" + {{if not (eq $label "alertname")}} + {{$label}}="{{$value}}" + {{end}} {{end}}
{{range $label, $value := .Event.Labels}} @@ -45,7 +48,14 @@

Alerts

{{timeSince .Created}} ago {{timeSince .LastRefreshed}} ago - No + + {{$silence := call $silenceForEvent .Event}} + {{if $silence}} + by silence {{$silence.Id}} + {{else}} + not silenced + {{end}} + {{end}} diff --git a/web/web.go b/web/web.go index f2f919de9f..79abc7dc9f 100644 --- a/web/web.go +++ b/web/web.go @@ -63,7 +63,7 @@ func (w WebService) ServeForever() error { exp.Handle("/status", w.StatusHandler) exp.Handle("/api/", compressionHandler{handler: gorest.Handle()}) - exp.Handle("/metrics.json", prometheus.DefaultHandler) + exp.Handle("/metrics", prometheus.DefaultHandler) if *useLocalAssets { exp.Handle("/static/", http.StripPrefix("/static/", http.FileServer(http.Dir("web/static")))) } else {