From 380f407e8bcbfe2c213dd2fa7838fefc0af760bd Mon Sep 17 00:00:00 2001
From: Jacob Lisi <jacob.t.lisi@gmail.com>
Date: Tue, 11 Feb 2020 23:49:09 -0500
Subject: [PATCH 1/7] add per-tenant alertmanager metrics

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>
---
 CHANGELOG.md                                  |   1 +
 go.sum                                        |   3 +
 pkg/alertmanager/alertmanager.go              |  66 ++-
 pkg/alertmanager/alertmanager_metrics.go      | 210 +++++++++
 pkg/alertmanager/alertmanager_metrics_test.go | 421 ++++++++++++++++++
 pkg/alertmanager/multitenant.go               |  32 +-
 pkg/alertmanager/multitenant_test.go          |  31 +-
 pkg/cortex/modules.go                         |   2 +-
 pkg/util/metrics_helper.go                    |  18 +
 pkg/util/metrics_helper_test.go               |  81 ++++
 10 files changed, 842 insertions(+), 23 deletions(-)
 create mode 100644 pkg/alertmanager/alertmanager_metrics.go
 create mode 100644 pkg/alertmanager/alertmanager_metrics_test.go

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1103805f88e..278ae55f652 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -36,6 +36,7 @@
 * [FEATURE] Add /config HTTP endpoint which exposes the current Cortex configuration as YAML. #2165
 * [FEATURE] Allow Prometheus remote write directly to ingesters. #1491
 * [FEATURE] Add flag `-experimental.tsdb.stripe-size` to expose TSDB stripe size option. #2185
+* [ENHANCEMENT] Alertmanager: Expose Per-tenant alertmanager metrics #2124
 * [ENHANCEMENT] Add `status` label to `cortex_alertmanager_configs` metric to gauge the number of valid and invalid configs. #2125
 * [ENHANCEMENT] Cassandra Authentication: added the `custom_authenticators` config option that allows users to authenticate with cassandra clusters using password authenticators that are not approved by default in [gocql](https://github.com/gocql/gocql/blob/81b8263d9fe526782a588ef94d3fa5c6148e5d67/conn.go#L27) #2093
 * [ENHANCEMENT] Experimental TSDB: Export TSDB Syncer metrics from Compactor component, they are prefixed with `cortex_compactor_`. #2023
diff --git a/go.sum b/go.sum
index 87e824fcd96..cbc4a88eb4b 100644
--- a/go.sum
+++ b/go.sum
@@ -225,6 +225,7 @@ github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMo
 github.com/fsouza/fake-gcs-server v1.7.0 h1:Un0BXUXrRWYSmYyC1Rqm2e2WJfTPyDy/HGMz31emTi8=
 github.com/fsouza/fake-gcs-server v1.7.0/go.mod h1:5XIRs4YvwNbNoz+1JF8j6KLAyDh7RHGAyAK3EP2EsNk=
 github.com/ghodss/yaml v0.0.0-20150909031657-73d445a93680/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
+github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk=
 github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
 github.com/globalsign/mgo v0.0.0-20180905125535-1ca0a4f7cbcb/go.mod h1:xkRDCp4j0OGD1HRkm4kmhM+pmpv3AKq5SU7GMg4oO/Q=
 github.com/globalsign/mgo v0.0.0-20181015135952-eeefdecb41b8/go.mod h1:xkRDCp4j0OGD1HRkm4kmhM+pmpv3AKq5SU7GMg4oO/Q=
@@ -361,6 +362,7 @@ github.com/google/martian v2.1.0+incompatible h1:/CP5g8u/VJHijgedC/Legn3BAbAaWPg
 github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
 github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
 github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
+github.com/google/pprof v0.0.0-20190723021845-34ac40c74b70 h1:XTnP8fJpa4Kvpw2qARB4KS9izqxPS0Sd92cDlY3uk+w=
 github.com/google/pprof v0.0.0-20190723021845-34ac40c74b70/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
 github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
 github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
@@ -461,6 +463,7 @@ github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI=
 github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
 github.com/imdario/mergo v0.3.5/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA=
 github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
+github.com/influxdata/influxdb v1.7.7 h1:UvNzAPfBrKMENVbQ4mr4ccA9sW+W1Ihl0Yh1s0BiVAg=
 github.com/influxdata/influxdb v1.7.7/go.mod h1:qZna6X/4elxqT3yI9iZYdZrWWdeFOOprn86kgg4+IzY=
 github.com/jackc/fake v0.0.0-20150926172116-812a484cc733/go.mod h1:WrMFNQdiFJ80sQsxDoMokWK1W5TQtxBFNpzWTD84ibQ=
 github.com/jackc/pgx v3.2.0+incompatible/go.mod h1:0ZGrqGqkRlliWnWB4zKnWtjbSWbGkVEFm4TeybAXq+I=
diff --git a/pkg/alertmanager/alertmanager.go b/pkg/alertmanager/alertmanager.go
index 04f534c1388..cafb95ea38a 100644
--- a/pkg/alertmanager/alertmanager.go
+++ b/pkg/alertmanager/alertmanager.go
@@ -10,6 +10,7 @@ import (
 	"time"
 
 	"github.com/go-kit/kit/log"
+	"github.com/go-kit/kit/log/level"
 	"github.com/prometheus/alertmanager/api"
 	"github.com/prometheus/alertmanager/cluster"
 	"github.com/prometheus/alertmanager/config"
@@ -66,6 +67,9 @@ type Alertmanager struct {
 	wg              sync.WaitGroup
 	mux             *http.ServeMux
 	registry        *prometheus.Registry
+
+	activeMtx sync.Mutex
+	active    bool
 }
 
 var webReload = make(chan chan error)
@@ -81,17 +85,16 @@ func init() {
 }
 
 // New creates a new Alertmanager.
-func New(cfg *Config) (*Alertmanager, error) {
+func New(cfg *Config, reg *prometheus.Registry) (*Alertmanager, error) {
 	am := &Alertmanager{
-		cfg:    cfg,
-		logger: log.With(cfg.Logger, "user", cfg.UserID),
-		stop:   make(chan struct{}),
+		cfg:       cfg,
+		logger:    log.With(cfg.Logger, "user", cfg.UserID),
+		stop:      make(chan struct{}),
+		active:    false,
+		activeMtx: sync.Mutex{},
 	}
 
-	// TODO(cortex): Build a registry that can merge metrics from multiple users.
-	// For now, these metrics are ignored, as we can't register the same
-	// metric twice with a single registry.
-	am.registry = prometheus.NewRegistry()
+	am.registry = reg
 
 	am.wg.Add(1)
 	nflogID := fmt.Sprintf("nflog:%s", cfg.UserID)
@@ -173,7 +176,12 @@ func clusterWait(p *cluster.Peer, timeout time.Duration) func() time.Duration {
 
 // ApplyConfig applies a new configuration to an Alertmanager.
 func (am *Alertmanager) ApplyConfig(userID string, conf *config.Config) error {
-	templateFiles := make([]string, len(conf.Templates))
+	// Ensure the alertmanager is set to active
+	am.activeMtx.Lock()
+	am.active = true
+	am.activeMtx.Unlock()
+
+	templateFiles := make([]string, len(conf.Templates), len(conf.Templates))
 	if len(conf.Templates) > 0 {
 		for i, t := range conf.Templates {
 			templateFiles[i] = filepath.Join(am.cfg.DataDir, "templates", userID, t)
@@ -236,9 +244,47 @@ func (am *Alertmanager) ApplyConfig(userID string, conf *config.Config) error {
 	return nil
 }
 
+// Pause running jobs in the alertmanager that are able to be restarted and sets
+// to inactive
+func (am *Alertmanager) Pause() {
+	// Set to inactive
+	am.activeMtx.Lock()
+	am.active = false
+	am.activeMtx.Unlock()
+
+	// Ensure inhibitor is set before being called
+	if am.inhibitor != nil {
+		am.inhibitor.Stop()
+	}
+
+	// Ensure dispatcher is set before being called
+	if am.dispatcher != nil {
+		am.dispatcher.Stop()
+	}
+
+	// Remove all of the active silences from the alertmanager
+	silences, _, err := am.silences.Query()
+	if err != nil {
+		level.Warn(am.logger).Log("msg", "unable to retrieve silences for removal", "err", err)
+	}
+	for _, si := range silences {
+		err = am.silences.Expire(si.Id)
+		if err != nil {
+			level.Warn(am.logger).Log("msg", "unable to remove silence", "err", err, "silence", si.Id)
+		}
+	}
+}
+
 // Stop stops the Alertmanager.
 func (am *Alertmanager) Stop() {
-	am.dispatcher.Stop()
+	if am.inhibitor != nil {
+		am.inhibitor.Stop()
+	}
+
+	if am.dispatcher != nil {
+		am.dispatcher.Stop()
+	}
+
 	am.alerts.Close()
 	close(am.stop)
 	am.wg.Wait()
diff --git a/pkg/alertmanager/alertmanager_metrics.go b/pkg/alertmanager/alertmanager_metrics.go
new file mode 100644
index 00000000000..723f1355d74
--- /dev/null
+++ b/pkg/alertmanager/alertmanager_metrics.go
@@ -0,0 +1,210 @@
+package alertmanager
+
+import (
+	"sync"
+
+	"github.com/prometheus/client_golang/prometheus"
+
+	"github.com/cortexproject/cortex/pkg/util"
+)
+
+// This struct aggregates metrics exported by Alertmanager
+// and re-exports those aggregates as Cortex metrics.
+type alertmanagerMetrics struct {
+	// Maps userID -> registry
+	regsMu sync.Mutex
+	regs   map[string]*prometheus.Registry
+
+	// exported metrics, gathered from Alertmanager API
+	alertsReceived *prometheus.Desc
+	alertsInvalid  *prometheus.Desc
+
+	// exported metrics, gathered from Alertmanager PipelineBuilder
+	numNotifications           *prometheus.Desc
+	numFailedNotifications     *prometheus.Desc
+	notificationLatencySeconds *prometheus.Desc
+
+	// exported metrics, gathered from Alertmanager nflog
+	nflogGCDuration              *prometheus.Desc
+	nflogSnapshotDuration        *prometheus.Desc
+	nflogSnapshotSize            *prometheus.Desc
+	nflogQueriesTotal            *prometheus.Desc
+	nflogQueryErrorsTotal        *prometheus.Desc
+	nflogQueryDuration           *prometheus.Desc
+	nflogPropagatedMessagesTotal *prometheus.Desc
+
+	// exported metrics, gathered from Alertmanager Marker
+	markerAlerts *prometheus.Desc
+
+	// exported metrics, gathered from Alertmanager Silences
+	silencesGCDuration              *prometheus.Desc
+	silencesSnapshotDuration        *prometheus.Desc
+	silencesSnapshotSize            *prometheus.Desc
+	silencesQueriesTotal            *prometheus.Desc
+	silencesQueryErrorsTotal        *prometheus.Desc
+	silencesQueryDuration           *prometheus.Desc
+	silences                        *prometheus.Desc
+	silencesPropagatedMessagesTotal *prometheus.Desc
+}
+
+func newAlertmanagerMetrics() *alertmanagerMetrics {
+	return &alertmanagerMetrics{
+		regs:   map[string]*prometheus.Registry{},
+		regsMu: sync.Mutex{},
+		alertsReceived: prometheus.NewDesc(
+			"cortex_alertmanager_alerts_received_total",
+			"The total number of received alerts.",
+			nil, nil),
+		alertsInvalid: prometheus.NewDesc(
+			"cortex_alertmanager_alerts_invalid_total",
+			"The total number of received alerts that were invalid.",
+			nil, nil),
+		numNotifications: prometheus.NewDesc(
+			"cortex_alertmanager_notifications_total",
+			"The total number of attempted notifications.",
+			[]string{"user"}, nil),
+		numFailedNotifications: prometheus.NewDesc(
+			"cortex_alertmanager_notifications_failed_total",
+			"The total number of failed notifications.",
+			[]string{"user"}, nil),
+		notificationLatencySeconds: prometheus.NewDesc(
+			"cortex_alertmanager_notification_latency_seconds",
+			"The latency of notifications in seconds.",
+			nil, nil),
+		nflogGCDuration: prometheus.NewDesc(
+			"cortex_alertmanager_nflog_gc_duration_seconds",
+			"Duration of the last notification log garbage collection cycle.",
+			nil, nil),
+		nflogSnapshotDuration: prometheus.NewDesc(
+			"cortex_alertmanager_nflog_snapshot_duration_seconds",
+			"Duration of the last notification log snapshot.",
+			nil, nil),
+		nflogSnapshotSize: prometheus.NewDesc(
+			"cortex_alertmanager_nflog_snapshot_size_bytes",
+			"Size of the last notification log snapshot in bytes.",
+			nil, nil),
+		nflogQueriesTotal: prometheus.NewDesc(
+			"cortex_alertmanager_nflog_queries_total",
+			"Number of notification log queries were received.",
+			nil, nil),
+		nflogQueryErrorsTotal: prometheus.NewDesc(
+			"cortex_alertmanager_nflog_query_errors_total",
+			"Number notification log received queries that failed.",
+			nil, nil),
+		nflogQueryDuration: prometheus.NewDesc(
+			"cortex_alertmanager_nflog_query_duration_seconds",
+			"Duration of notification log query evaluation.",
+			nil, nil),
+		nflogPropagatedMessagesTotal: prometheus.NewDesc(
+			"cortex_alertmanager_nflog_gossip_messages_propagated_total",
+			"Number of received gossip messages that have been further gossiped.",
+			nil, nil),
+		markerAlerts: prometheus.NewDesc(
+			"cortex_alertmanager_alerts",
+			"How many alerts by state.",
+			[]string{"user", "state"}, nil),
+		silencesGCDuration: prometheus.NewDesc(
+			"cortex_alertmanager_silences_gc_duration_seconds",
+			"Duration of the last silence garbage collection cycle.",
+			nil, nil),
+		silencesSnapshotDuration: prometheus.NewDesc(
+			"cortex_alertmanager_silences_snapshot_duration_seconds",
+			"Duration of the last silence snapshot.",
+			nil, nil),
+		silencesSnapshotSize: prometheus.NewDesc(
+			"cortex_alertmanager_silences_snapshot_size_bytes",
+			"Size of the last silence snapshot in bytes.",
+			nil, nil),
+		silencesQueriesTotal: prometheus.NewDesc(
+			"cortex_alertmanager_silences_queries_total",
+			"How many silence queries were received.",
+			nil, nil),
+		silencesQueryErrorsTotal: prometheus.NewDesc(
+			"cortex_alertmanager_silences_query_errors_total",
+			"How many silence received queries did not succeed.",
+			nil, nil),
+		silencesQueryDuration: prometheus.NewDesc(
+			"cortex_alertmanager_silences_query_duration_seconds",
+			"Duration of silence query evaluation.",
+			nil, nil),
+		silencesPropagatedMessagesTotal: prometheus.NewDesc(
+			"cortex_alertmanager_silences_gossip_messages_propagated_total",
+			"Number of received gossip messages that have been further gossiped.",
+			nil, nil),
+		silences: prometheus.NewDesc(
+			"cortex_alertmanager_silences",
+			"How many silences by state.",
+			[]string{"user", "state"}, nil),
+	}
+}
+
+func (m *alertmanagerMetrics) addUserRegistry(user string, reg *prometheus.Registry) {
+	m.regsMu.Lock()
+	m.regs[user] = reg
+	m.regsMu.Unlock()
+}
+
+func (m *alertmanagerMetrics) registries() map[string]*prometheus.Registry {
+	regs := map[string]*prometheus.Registry{}
+
+	m.regsMu.Lock()
+	defer m.regsMu.Unlock()
+	for uid, r := range m.regs {
+		regs[uid] = r
+	}
+
+	return regs
+}
+
+func (m *alertmanagerMetrics) Describe(out chan<- *prometheus.Desc) {
+	out <- m.alertsReceived
+	out <- m.alertsInvalid
+	out <- m.numNotifications
+	out <- m.numFailedNotifications
+	out <- m.notificationLatencySeconds
+	out <- m.nflogGCDuration
+	out <- m.nflogSnapshotDuration
+	out <- m.nflogSnapshotSize
+	out <- m.nflogQueriesTotal
+	out <- m.nflogQueryErrorsTotal
+	out <- m.nflogQueryDuration
+	out <- m.nflogPropagatedMessagesTotal
+	out <- m.markerAlerts
+	out <- m.silencesGCDuration
+	out <- m.silencesSnapshotDuration
+	out <- m.silencesSnapshotSize
+	out <- m.silencesQueriesTotal
+	out <- m.silencesQueryErrorsTotal
+	out <- m.silencesQueryDuration
+	out <- m.silences
+	out <- m.silencesPropagatedMessagesTotal
+}
+
+func (m *alertmanagerMetrics) Collect(out chan<- prometheus.Metric) {
+	data := util.BuildMetricFamiliesPerUserFromUserRegistries(m.registries())
+
+	data.SendSumOfCounters(out, m.alertsReceived, "alertmanager_alerts_received_total")
+	data.SendSumOfCounters(out, m.alertsInvalid, "alertmanager_alerts_invalid_total")
+
+	data.SendSumOfCountersPerUser(out, m.numNotifications, "alertmanager_notifications_total")
+	data.SendSumOfCountersPerUser(out, m.numFailedNotifications, "alertmanager_notifications_failed_total")
+	data.SendSumOfHistograms(out, m.notificationLatencySeconds, "alertmanager_notification_latency_seconds")
+	data.SendSumOfGaugesPerUserWithLabels(out, m.markerAlerts, "alertmanager_alerts", "state")
+
+	data.SendSumOfSummaries(out, m.nflogGCDuration, "alertmanager_nflog_gc_duration_seconds")
+	data.SendSumOfSummaries(out, m.nflogSnapshotDuration, "alertmanager_nflog_snapshot_duration_seconds")
+	data.SendSumOfGauges(out, m.nflogSnapshotSize, "alertmanager_nflog_snapshot_size_bytes")
+	data.SendSumOfCounters(out, m.nflogQueriesTotal, "alertmanager_nflog_queries_total")
+	data.SendSumOfCounters(out, m.nflogQueryErrorsTotal, "alertmanager_nflog_query_errors_total")
+	data.SendSumOfHistograms(out, m.nflogQueryDuration, "alertmanager_nflog_query_duration_seconds")
+	data.SendSumOfCounters(out, m.nflogPropagatedMessagesTotal, "alertmanager_nflog_gossip_messages_propagated_total")
+
+	data.SendSumOfSummaries(out, m.silencesGCDuration, "alertmanager_silences_gc_duration_seconds")
+	data.SendSumOfSummaries(out, m.silencesSnapshotDuration, "alertmanager_silences_snapshot_duration_seconds")
+	data.SendSumOfGauges(out, m.silencesSnapshotSize, "alertmanager_silences_snapshot_size_bytes")
+	data.SendSumOfCounters(out, m.silencesQueriesTotal, "alertmanager_silences_queries_total")
+	data.SendSumOfCounters(out, m.silencesQueryErrorsTotal, "alertmanager_silences_query_errors_total")
+	data.SendSumOfHistograms(out, m.silencesQueryDuration, "alertmanager_silences_query_duration_seconds")
+	data.SendSumOfCounters(out, m.silencesPropagatedMessagesTotal, "alertmanager_silences_gossip_messages_propagated_total")
+	data.SendSumOfGaugesPerUserWithLabels(out, m.silences, "alertmanager_silences", "state")
+}
diff --git a/pkg/alertmanager/alertmanager_metrics_test.go b/pkg/alertmanager/alertmanager_metrics_test.go
new file mode 100644
index 00000000000..bd893c646ba
--- /dev/null
+++ b/pkg/alertmanager/alertmanager_metrics_test.go
@@ -0,0 +1,421 @@
+package alertmanager
+
+import (
+	"bytes"
+	"testing"
+
+	"github.com/prometheus/alertmanager/types"
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/testutil"
+	"github.com/stretchr/testify/require"
+)
+
+var integrations = []string{
+	"email",
+	"hipchat",
+	"pagerduty",
+	"wechat",
+	"pushover",
+	"slack",
+	"opsgenie",
+	"webhook",
+	"victorops",
+}
+
+func TestAlertmanagerMetricsStore(t *testing.T) {
+	mainReg := prometheus.NewPedanticRegistry()
+
+	alertmanangerMetrics := newAlertmanagerMetrics()
+	mainReg.MustRegister(alertmanangerMetrics)
+	alertmanangerMetrics.addUserRegistry("user1", populateAlertmanager(1))
+	alertmanangerMetrics.addUserRegistry("user2", populateAlertmanager(10))
+	alertmanangerMetrics.addUserRegistry("user3", populateAlertmanager(100))
+
+	//noinspection ALL
+	err := testutil.GatherAndCompare(mainReg, bytes.NewBufferString(`
+		# HELP cortex_alertmanager_alerts How many alerts by state.
+		# TYPE cortex_alertmanager_alerts gauge
+		cortex_alertmanager_alerts{state="active",user="user1"} 1
+		cortex_alertmanager_alerts{state="active",user="user2"} 10
+		cortex_alertmanager_alerts{state="active",user="user3"} 100
+		cortex_alertmanager_alerts{state="suppressed",user="user1"} 2
+		cortex_alertmanager_alerts{state="suppressed",user="user2"} 20
+		cortex_alertmanager_alerts{state="suppressed",user="user3"} 200
+		# HELP cortex_alertmanager_alerts_invalid_total The total number of received alerts that were invalid.
+		# TYPE cortex_alertmanager_alerts_invalid_total counter
+		cortex_alertmanager_alerts_invalid_total 222
+		# HELP cortex_alertmanager_alerts_received_total The total number of received alerts.
+		# TYPE cortex_alertmanager_alerts_received_total counter
+		cortex_alertmanager_alerts_received_total 1110
+		# HELP cortex_alertmanager_nflog_gc_duration_seconds Duration of the last notification log garbage collection cycle.
+		# TYPE cortex_alertmanager_nflog_gc_duration_seconds summary
+		cortex_alertmanager_nflog_gc_duration_seconds_sum 111
+		cortex_alertmanager_nflog_gc_duration_seconds_count 3
+		# HELP cortex_alertmanager_nflog_gossip_messages_propagated_total Number of received gossip messages that have been further gossiped.
+		# TYPE cortex_alertmanager_nflog_gossip_messages_propagated_total counter
+		cortex_alertmanager_nflog_gossip_messages_propagated_total 111
+		# HELP cortex_alertmanager_nflog_queries_total Number of notification log queries were received.
+		# TYPE cortex_alertmanager_nflog_queries_total counter
+		cortex_alertmanager_nflog_queries_total 111
+		# HELP cortex_alertmanager_nflog_query_duration_seconds Duration of notification log query evaluation.
+		# TYPE cortex_alertmanager_nflog_query_duration_seconds histogram
+		cortex_alertmanager_nflog_query_duration_seconds_bucket{le="0.005"} 0
+		cortex_alertmanager_nflog_query_duration_seconds_bucket{le="0.01"} 0
+		cortex_alertmanager_nflog_query_duration_seconds_bucket{le="0.025"} 0
+		cortex_alertmanager_nflog_query_duration_seconds_bucket{le="0.05"} 0
+		cortex_alertmanager_nflog_query_duration_seconds_bucket{le="0.1"} 0
+		cortex_alertmanager_nflog_query_duration_seconds_bucket{le="0.25"} 0
+		cortex_alertmanager_nflog_query_duration_seconds_bucket{le="0.5"} 0
+		cortex_alertmanager_nflog_query_duration_seconds_bucket{le="1"} 1
+		cortex_alertmanager_nflog_query_duration_seconds_bucket{le="2.5"} 1
+		cortex_alertmanager_nflog_query_duration_seconds_bucket{le="5"} 1
+		cortex_alertmanager_nflog_query_duration_seconds_bucket{le="10"} 2
+		cortex_alertmanager_nflog_query_duration_seconds_bucket{le="+Inf"} 3
+		cortex_alertmanager_nflog_query_duration_seconds_sum 111
+		cortex_alertmanager_nflog_query_duration_seconds_count 3
+		# HELP cortex_alertmanager_nflog_query_errors_total Number notification log received queries that failed.
+		# TYPE cortex_alertmanager_nflog_query_errors_total counter
+		cortex_alertmanager_nflog_query_errors_total 111
+		# HELP cortex_alertmanager_nflog_snapshot_duration_seconds Duration of the last notification log snapshot.
+		# TYPE cortex_alertmanager_nflog_snapshot_duration_seconds summary
+		cortex_alertmanager_nflog_snapshot_duration_seconds_sum 111
+		cortex_alertmanager_nflog_snapshot_duration_seconds_count 3
+		# HELP cortex_alertmanager_nflog_snapshot_size_bytes Size of the last notification log snapshot in bytes.
+		# TYPE cortex_alertmanager_nflog_snapshot_size_bytes gauge
+		cortex_alertmanager_nflog_snapshot_size_bytes 111
+		# HELP cortex_alertmanager_notification_latency_seconds The latency of notifications in seconds.
+		# TYPE cortex_alertmanager_notification_latency_seconds histogram
+		cortex_alertmanager_notification_latency_seconds_bucket{le="1"} 15
+		cortex_alertmanager_notification_latency_seconds_bucket{le="5"} 21
+		cortex_alertmanager_notification_latency_seconds_bucket{le="10"} 23
+		cortex_alertmanager_notification_latency_seconds_bucket{le="15"} 25
+		cortex_alertmanager_notification_latency_seconds_bucket{le="20"} 27
+		cortex_alertmanager_notification_latency_seconds_bucket{le="+Inf"} 27
+		cortex_alertmanager_notification_latency_seconds_sum 99.9
+		cortex_alertmanager_notification_latency_seconds_count 27
+		# HELP cortex_alertmanager_notifications_failed_total The total number of failed notifications.
+		# TYPE cortex_alertmanager_notifications_failed_total counter
+		cortex_alertmanager_notifications_failed_total{user="user1"} 36
+		cortex_alertmanager_notifications_failed_total{user="user2"} 360
+		cortex_alertmanager_notifications_failed_total{user="user3"} 3600
+		# HELP cortex_alertmanager_notifications_total The total number of attempted notifications.
+		# TYPE cortex_alertmanager_notifications_total counter
+		cortex_alertmanager_notifications_total{user="user1"} 36
+		cortex_alertmanager_notifications_total{user="user2"} 360
+		cortex_alertmanager_notifications_total{user="user3"} 3600
+		# HELP cortex_alertmanager_silences How many silences by state.
+		# TYPE cortex_alertmanager_silences gauge
+		cortex_alertmanager_silences{state="active",user="user1"} 1
+		cortex_alertmanager_silences{state="active",user="user2"} 10
+		cortex_alertmanager_silences{state="active",user="user3"} 100
+		cortex_alertmanager_silences{state="expired",user="user1"} 2
+		cortex_alertmanager_silences{state="expired",user="user2"} 20
+		cortex_alertmanager_silences{state="expired",user="user3"} 200
+		cortex_alertmanager_silences{state="pending",user="user1"} 3
+		cortex_alertmanager_silences{state="pending",user="user2"} 30
+		cortex_alertmanager_silences{state="pending",user="user3"} 300
+		# HELP cortex_alertmanager_silences_gc_duration_seconds Duration of the last silence garbage collection cycle.
+		# TYPE cortex_alertmanager_silences_gc_duration_seconds summary
+		cortex_alertmanager_silences_gc_duration_seconds_sum 111
+		cortex_alertmanager_silences_gc_duration_seconds_count 3
+		# HELP cortex_alertmanager_silences_gossip_messages_propagated_total Number of received gossip messages that have been further gossiped.
+		# TYPE cortex_alertmanager_silences_gossip_messages_propagated_total counter
+		cortex_alertmanager_silences_gossip_messages_propagated_total 111
+		# HELP cortex_alertmanager_silences_queries_total How many silence queries were received.
+		# TYPE cortex_alertmanager_silences_queries_total counter
+		cortex_alertmanager_silences_queries_total 111
+		# HELP cortex_alertmanager_silences_query_duration_seconds Duration of silence query evaluation.
+		# TYPE cortex_alertmanager_silences_query_duration_seconds histogram
+		cortex_alertmanager_silences_query_duration_seconds_bucket{le="0.005"} 0
+		cortex_alertmanager_silences_query_duration_seconds_bucket{le="0.01"} 0
+		cortex_alertmanager_silences_query_duration_seconds_bucket{le="0.025"} 0
+		cortex_alertmanager_silences_query_duration_seconds_bucket{le="0.05"} 0
+		cortex_alertmanager_silences_query_duration_seconds_bucket{le="0.1"} 0
+		cortex_alertmanager_silences_query_duration_seconds_bucket{le="0.25"} 0
+		cortex_alertmanager_silences_query_duration_seconds_bucket{le="0.5"} 0
+		cortex_alertmanager_silences_query_duration_seconds_bucket{le="1"} 1
+		cortex_alertmanager_silences_query_duration_seconds_bucket{le="2.5"} 1
+		cortex_alertmanager_silences_query_duration_seconds_bucket{le="5"} 1
+		cortex_alertmanager_silences_query_duration_seconds_bucket{le="10"} 2
+		cortex_alertmanager_silences_query_duration_seconds_bucket{le="+Inf"} 3
+		cortex_alertmanager_silences_query_duration_seconds_sum 111
+		cortex_alertmanager_silences_query_duration_seconds_count 3
+		# HELP cortex_alertmanager_silences_query_errors_total How many silence received queries did not succeed.
+		# TYPE cortex_alertmanager_silences_query_errors_total counter
+		cortex_alertmanager_silences_query_errors_total 111
+		# HELP cortex_alertmanager_silences_snapshot_duration_seconds Duration of the last silence snapshot.
+		# TYPE cortex_alertmanager_silences_snapshot_duration_seconds summary
+		cortex_alertmanager_silences_snapshot_duration_seconds_sum 111
+		cortex_alertmanager_silences_snapshot_duration_seconds_count 3
+		# HELP cortex_alertmanager_silences_snapshot_size_bytes Size of the last silence snapshot in bytes.
+		# TYPE cortex_alertmanager_silences_snapshot_size_bytes gauge
+		cortex_alertmanager_silences_snapshot_size_bytes 111
+
+`))
+	require.NoError(t, err)
+}
+
+func populateAlertmanager(base float64) *prometheus.Registry {
+	reg := prometheus.NewRegistry()
+	s := newSilenceMetrics(reg)
+	s.gcDuration.Observe(base)
+	s.snapshotDuration.Observe(base)
+	s.snapshotSize.Add(base)
+	s.queriesTotal.Add(base)
+	s.queryErrorsTotal.Add(base)
+	s.queryDuration.Observe(base)
+	s.propagatedMessagesTotal.Add(base)
+	s.silencesActive.Set(base)
+	s.silencesExpired.Set(base * 2)
+	s.silencesPending.Set(base * 3)
+
+	n := newNflogMetrics(reg)
+	n.gcDuration.Observe(base)
+	n.snapshotDuration.Observe(base)
+	n.snapshotSize.Add(base)
+	n.queriesTotal.Add(base)
+	n.queryErrorsTotal.Add(base)
+	n.queryDuration.Observe(base)
+	n.propagatedMessagesTotal.Add(base)
+
+	nm := newNotifyMetrics(reg)
+	for i, integration := range integrations {
+		nm.numNotifications.WithLabelValues(integration).Add(base * float64(i))
+		nm.numFailedNotifications.WithLabelValues(integration).Add(base * float64(i))
+		nm.notificationLatencySeconds.WithLabelValues(integration).Observe(base * float64(i) * 0.025)
+	}
+
+	m := newMarkerMetrics(reg)
+	m.alerts.WithLabelValues(string(types.AlertStateActive)).Add(base)
+	m.alerts.WithLabelValues(string(types.AlertStateSuppressed)).Add(base * 2)
+
+	v1APIMetrics := newAPIMetrics("v1", reg)
+	v1APIMetrics.firing.Add(base * 2)
+	v1APIMetrics.invalid.Add(base)
+	v1APIMetrics.resolved.Add(base * 3)
+
+	v2APIMetrics := newAPIMetrics("v2", reg)
+	v2APIMetrics.firing.Add(base * 2)
+	v2APIMetrics.invalid.Add(base)
+	v2APIMetrics.resolved.Add(base * 3)
+
+	return reg
+}
+
+// Copied from github.com/alertmanager/nflog/nflog.go
+type nflogMetrics struct {
+	gcDuration              prometheus.Summary
+	snapshotDuration        prometheus.Summary
+	snapshotSize            prometheus.Gauge
+	queriesTotal            prometheus.Counter
+	queryErrorsTotal        prometheus.Counter
+	queryDuration           prometheus.Histogram
+	propagatedMessagesTotal prometheus.Counter
+}
+
+func newNflogMetrics(r prometheus.Registerer) *nflogMetrics {
+	m := &nflogMetrics{}
+
+	m.gcDuration = prometheus.NewSummary(prometheus.SummaryOpts{
+		Name:       "alertmanager_nflog_gc_duration_seconds",
+		Help:       "Duration of the last notification log garbage collection cycle.",
+		Objectives: map[float64]float64{},
+	})
+	m.snapshotDuration = prometheus.NewSummary(prometheus.SummaryOpts{
+		Name:       "alertmanager_nflog_snapshot_duration_seconds",
+		Help:       "Duration of the last notification log snapshot.",
+		Objectives: map[float64]float64{},
+	})
+	m.snapshotSize = prometheus.NewGauge(prometheus.GaugeOpts{
+		Name: "alertmanager_nflog_snapshot_size_bytes",
+		Help: "Size of the last notification log snapshot in bytes.",
+	})
+	m.queriesTotal = prometheus.NewCounter(prometheus.CounterOpts{
+		Name: "alertmanager_nflog_queries_total",
+		Help: "Number of notification log queries were received.",
+	})
+	m.queryErrorsTotal = prometheus.NewCounter(prometheus.CounterOpts{
+		Name: "alertmanager_nflog_query_errors_total",
+		Help: "Number notification log received queries that failed.",
+	})
+	m.queryDuration = prometheus.NewHistogram(prometheus.HistogramOpts{
+		Name: "alertmanager_nflog_query_duration_seconds",
+		Help: "Duration of notification log query evaluation.",
+	})
+	m.propagatedMessagesTotal = prometheus.NewCounter(prometheus.CounterOpts{
+		Name: "alertmanager_nflog_gossip_messages_propagated_total",
+		Help: "Number of received gossip messages that have been further gossiped.",
+	})
+
+	if r != nil {
+		r.MustRegister(
+			m.gcDuration,
+			m.snapshotDuration,
+			m.snapshotSize,
+			m.queriesTotal,
+			m.queryErrorsTotal,
+			m.queryDuration,
+			m.propagatedMessagesTotal,
+		)
+	}
+	return m
+}
+
+// Copied from github.com/alertmanager/silence/silence.go
+type silenceMetrics struct {
+	gcDuration              prometheus.Summary
+	snapshotDuration        prometheus.Summary
+	snapshotSize            prometheus.Gauge
+	queriesTotal            prometheus.Counter
+	queryErrorsTotal        prometheus.Counter
+	queryDuration           prometheus.Histogram
+	silencesActive          prometheus.Gauge
+	silencesPending         prometheus.Gauge
+	silencesExpired         prometheus.Gauge
+	propagatedMessagesTotal prometheus.Counter
+}
+
+func newSilenceMetrics(r prometheus.Registerer) *silenceMetrics {
+	m := &silenceMetrics{}
+
+	m.gcDuration = prometheus.NewSummary(prometheus.SummaryOpts{
+		Name:       "alertmanager_silences_gc_duration_seconds",
+		Help:       "Duration of the last silence garbage collection cycle.",
+		Objectives: map[float64]float64{},
+	})
+	m.snapshotDuration = prometheus.NewSummary(prometheus.SummaryOpts{
+		Name:       "alertmanager_silences_snapshot_duration_seconds",
+		Help:       "Duration of the last silence snapshot.",
+		Objectives: map[float64]float64{},
+	})
+	m.snapshotSize = prometheus.NewGauge(prometheus.GaugeOpts{
+		Name: "alertmanager_silences_snapshot_size_bytes",
+		Help: "Size of the last silence snapshot in bytes.",
+	})
+	m.queriesTotal = prometheus.NewCounter(prometheus.CounterOpts{
+		Name: "alertmanager_silences_queries_total",
+		Help: "How many silence queries were received.",
+	})
+	m.queryErrorsTotal = prometheus.NewCounter(prometheus.CounterOpts{
+		Name: "alertmanager_silences_query_errors_total",
+		Help: "How many silence received queries did not succeed.",
+	})
+	m.queryDuration = prometheus.NewHistogram(prometheus.HistogramOpts{
+		Name: "alertmanager_silences_query_duration_seconds",
+		Help: "Duration of silence query evaluation.",
+	})
+	m.propagatedMessagesTotal = prometheus.NewCounter(prometheus.CounterOpts{
+		Name: "alertmanager_silences_gossip_messages_propagated_total",
+		Help: "Number of received gossip messages that have been further gossiped.",
+	})
+	m.silencesActive = prometheus.NewGauge(prometheus.GaugeOpts{
+		Name:        "alertmanager_silences",
+		Help:        "How many silences by state.",
+		ConstLabels: prometheus.Labels{"state": string(types.SilenceStateActive)},
+	})
+	m.silencesPending = prometheus.NewGauge(prometheus.GaugeOpts{
+		Name:        "alertmanager_silences",
+		Help:        "How many silences by state.",
+		ConstLabels: prometheus.Labels{"state": string(types.SilenceStatePending)},
+	})
+	m.silencesExpired = prometheus.NewGauge(prometheus.GaugeOpts{
+		Name:        "alertmanager_silences",
+		Help:        "How many silences by state.",
+		ConstLabels: prometheus.Labels{"state": string(types.SilenceStateExpired)},
+	})
+
+	if r != nil {
+		r.MustRegister(
+			m.gcDuration,
+			m.snapshotDuration,
+			m.snapshotSize,
+			m.queriesTotal,
+			m.queryErrorsTotal,
+			m.queryDuration,
+			m.silencesActive,
+			m.silencesPending,
+			m.silencesExpired,
+			m.propagatedMessagesTotal,
+		)
+	}
+	return m
+}
+
+// Copied from github.com/alertmanager/notify/notify.go
+type notifyMetrics struct {
+	numNotifications           *prometheus.CounterVec
+	numFailedNotifications     *prometheus.CounterVec
+	notificationLatencySeconds *prometheus.HistogramVec
+}
+
+func newNotifyMetrics(r prometheus.Registerer) *notifyMetrics {
+	m := &notifyMetrics{
+		numNotifications: prometheus.NewCounterVec(prometheus.CounterOpts{
+			Namespace: "alertmanager",
+			Name:      "notifications_total",
+			Help:      "The total number of attempted notifications.",
+		}, []string{"integration"}),
+		numFailedNotifications: prometheus.NewCounterVec(prometheus.CounterOpts{
+			Namespace: "alertmanager",
+			Name:      "notifications_failed_total",
+			Help:      "The total number of failed notifications.",
+		}, []string{"integration"}),
+		notificationLatencySeconds: prometheus.NewHistogramVec(prometheus.HistogramOpts{
+			Namespace: "alertmanager",
+			Name:      "notification_latency_seconds",
+			Help:      "The latency of notifications in seconds.",
+			Buckets:   []float64{1, 5, 10, 15, 20},
+		}, []string{"integration"}),
+	}
+	for _, integration := range integrations {
+		m.numNotifications.WithLabelValues(integration)
+		m.numFailedNotifications.WithLabelValues(integration)
+		m.notificationLatencySeconds.WithLabelValues(integration)
+	}
+	r.MustRegister(m.numNotifications, m.numFailedNotifications, m.notificationLatencySeconds)
+	return m
+}
+
+type markerMetrics struct {
+	alerts *prometheus.GaugeVec
+}
+
+func newMarkerMetrics(r prometheus.Registerer) *markerMetrics {
+	m := &markerMetrics{
+		alerts: prometheus.NewGaugeVec(prometheus.GaugeOpts{
+			Name: "alertmanager_alerts",
+			Help: "How many alerts by state.",
+		}, []string{"state"}),
+	}
+
+	r.MustRegister(m.alerts)
+	return m
+}
+
+// Copied from github.com/alertmanager/api/metrics/metrics.go
+type apiMetrics struct {
+	firing   prometheus.Counter
+	resolved prometheus.Counter
+	invalid  prometheus.Counter
+}
+
+func newAPIMetrics(version string, r prometheus.Registerer) *apiMetrics {
+	numReceivedAlerts := prometheus.NewCounterVec(prometheus.CounterOpts{
+		Name:        "alertmanager_alerts_received_total",
+		Help:        "The total number of received alerts.",
+		ConstLabels: prometheus.Labels{"version": version},
+	}, []string{"status"})
+	numInvalidAlerts := prometheus.NewCounter(prometheus.CounterOpts{
+		Name:        "alertmanager_alerts_invalid_total",
+		Help:        "The total number of received alerts that were invalid.",
+		ConstLabels: prometheus.Labels{"version": version},
+	})
+	if r != nil {
+		r.MustRegister(numReceivedAlerts, numInvalidAlerts)
+	}
+	return &apiMetrics{
+		firing:   numReceivedAlerts.WithLabelValues("firing"),
+		resolved: numReceivedAlerts.WithLabelValues("resolved"),
+		invalid:  numInvalidAlerts,
+	}
+}
diff --git a/pkg/alertmanager/multitenant.go b/pkg/alertmanager/multitenant.go
index 60b8271a46e..58a573adab3 100644
--- a/pkg/alertmanager/multitenant.go
+++ b/pkg/alertmanager/multitenant.go
@@ -143,7 +143,8 @@ type MultitenantAlertmanager struct {
 	alertmanagersMtx sync.Mutex
 	alertmanagers    map[string]*Alertmanager
 
-	logger log.Logger
+	logger  log.Logger
+	metrics *alertmanagerMetrics
 
 	peer *cluster.Peer
 
@@ -152,7 +153,7 @@ type MultitenantAlertmanager struct {
 }
 
 // NewMultitenantAlertmanager creates a new MultitenantAlertmanager.
-func NewMultitenantAlertmanager(cfg *MultitenantAlertmanagerConfig, logger log.Logger) (*MultitenantAlertmanager, error) {
+func NewMultitenantAlertmanager(cfg *MultitenantAlertmanagerConfig, logger log.Logger, registerer prometheus.Registerer) (*MultitenantAlertmanager, error) {
 	err := os.MkdirAll(cfg.DataDir, 0777)
 	if err != nil {
 		return nil, fmt.Errorf("unable to create Alertmanager data directory %q: %s", cfg.DataDir, err)
@@ -174,7 +175,7 @@ func NewMultitenantAlertmanager(cfg *MultitenantAlertmanagerConfig, logger log.L
 	if cfg.ClusterBindAddr != "" {
 		peer, err = cluster.Create(
 			log.With(logger, "component", "cluster"),
-			prometheus.DefaultRegisterer,
+			registerer,
 			cfg.ClusterBindAddr,
 			cfg.ClusterAdvertiseAddr,
 			cfg.Peers,
@@ -205,12 +206,18 @@ func NewMultitenantAlertmanager(cfg *MultitenantAlertmanagerConfig, logger log.L
 		fallbackConfig: string(fallbackConfig),
 		cfgs:           map[string]alerts.AlertConfigDesc{},
 		alertmanagers:  map[string]*Alertmanager{},
+		metrics:        newAlertmanagerMetrics(),
 		peer:           peer,
 		store:          store,
 		logger:         log.With(logger, "component", "MultiTenantAlertmanager"),
 		stop:           make(chan struct{}),
 		done:           make(chan struct{}),
 	}
+
+	if registerer != nil {
+		registerer.MustRegister(am.metrics)
+	}
+
 	return am, nil
 }
 
@@ -300,15 +307,14 @@ func (am *MultitenantAlertmanager) syncConfigs(cfgs map[string]alerts.AlertConfi
 	defer am.alertmanagersMtx.Unlock()
 	for user, userAM := range am.alertmanagers {
 		if _, exists := cfgs[user]; !exists {
-			level.Info(am.logger).Log("msg", "deleting per-tenant alertmanager", "user", user)
-			userAM.Stop()
-			delete(am.alertmanagers, user)
+			level.Info(am.logger).Log("msg", "deactivating per-tenant alertmanager", "user", user)
+			userAM.Pause()
 			delete(am.cfgs, user)
-			level.Info(am.logger).Log("msg", "deleted per-tenant alertmanager", "user", user)
+			level.Info(am.logger).Log("msg", "deactivated per-tenant alertmanager", "user", user)
 		}
 	}
 	totalConfigs.WithLabelValues("invalid").Set(float64(invalid))
-	totalConfigs.WithLabelValues("valid").Set(float64(len(am.alertmanagers) - invalid))
+	totalConfigs.WithLabelValues("valid").Set(float64(len(am.cfgs) - invalid))
 }
 
 func (am *MultitenantAlertmanager) transformConfig(userID string, amConfig *amconfig.Config) (*amconfig.Config, error) {
@@ -423,6 +429,8 @@ func (am *MultitenantAlertmanager) setConfig(cfg alerts.AlertConfigDesc) error {
 }
 
 func (am *MultitenantAlertmanager) newAlertmanager(userID string, amConfig *amconfig.Config) (*Alertmanager, error) {
+	reg := prometheus.NewRegistry()
+	am.metrics.addUserRegistry(userID, reg)
 	newAM, err := New(&Config{
 		UserID:      userID,
 		DataDir:     am.cfg.DataDir,
@@ -431,7 +439,7 @@ func (am *MultitenantAlertmanager) newAlertmanager(userID string, amConfig *amco
 		PeerTimeout: am.cfg.PeerTimeout,
 		Retention:   am.cfg.Retention,
 		ExternalURL: am.cfg.ExternalURL.URL,
-	})
+	}, reg)
 	if err != nil {
 		return nil, fmt.Errorf("unable to start Alertmanager for user %v: %v", userID, err)
 	}
@@ -452,7 +460,11 @@ func (am *MultitenantAlertmanager) ServeHTTP(w http.ResponseWriter, req *http.Re
 	am.alertmanagersMtx.Lock()
 	userAM, ok := am.alertmanagers[userID]
 	am.alertmanagersMtx.Unlock()
-	if !ok {
+
+	userAM.activeMtx.Lock()
+	defer userAM.activeMtx.Unlock()
+
+	if !ok || !userAM.active {
 		http.Error(w, fmt.Sprintf("no Alertmanager for this user ID"), http.StatusNotFound)
 		return
 	}
diff --git a/pkg/alertmanager/multitenant_test.go b/pkg/alertmanager/multitenant_test.go
index 708004fd6e6..d8cb8207ccd 100644
--- a/pkg/alertmanager/multitenant_test.go
+++ b/pkg/alertmanager/multitenant_test.go
@@ -77,6 +77,7 @@ func TestLoadAllConfigs(t *testing.T) {
 		logger:           log.NewNopLogger(),
 		stop:             make(chan struct{}),
 		done:             make(chan struct{}),
+		metrics:          newAlertmanagerMetrics(),
 	}
 
 	// Ensure the configs are synced correctly
@@ -112,10 +113,36 @@ func TestLoadAllConfigs(t *testing.T) {
 	require.True(t, exists)
 	require.Equal(t, simpleConfigTwo, currentConfig.RawConfig)
 
-	// Test Delete User
+	// Test Delete User, ensure config is remove but alertmananger
+	// exists and is set to inactive
 	delete(mockStore.configs, "user3")
 	require.NoError(t, am.updateConfigs())
 	currentConfig, exists = am.cfgs["user3"]
 	require.False(t, exists)
-	require.Equal(t, alerts.AlertConfigDesc{}, currentConfig)
+	require.Equal(t, "", currentConfig.RawConfig)
+
+	userAM, exists := am.alertmanagers["user3"]
+	require.True(t, exists)
+	userAM.activeMtx.Lock()
+	require.False(t, userAM.active)
+	userAM.activeMtx.Unlock()
+
+	// Ensure when a 3rd config is re-added, it is synced correctly
+	mockStore.configs["user3"] = alerts.AlertConfigDesc{
+		User:      "user3",
+		RawConfig: simpleConfigOne,
+		Templates: []*alerts.TemplateDesc{},
+	}
+
+	require.NoError(t, am.updateConfigs())
+
+	currentConfig, exists = am.cfgs["user3"]
+	require.True(t, exists)
+	require.Equal(t, simpleConfigOne, currentConfig.RawConfig)
+
+	userAM, exists = am.alertmanagers["user3"]
+	require.True(t, exists)
+	userAM.activeMtx.Lock()
+	require.True(t, userAM.active)
+	userAM.activeMtx.Unlock()
 }
diff --git a/pkg/cortex/modules.go b/pkg/cortex/modules.go
index 5a6d71c3dc6..ff7d4d65ec3 100644
--- a/pkg/cortex/modules.go
+++ b/pkg/cortex/modules.go
@@ -533,7 +533,7 @@ func (t *Cortex) stopConfigs() error {
 }
 
 func (t *Cortex) initAlertmanager(cfg *Config) (err error) {
-	t.alertmanager, err = alertmanager.NewMultitenantAlertmanager(&cfg.Alertmanager, util.Logger)
+	t.alertmanager, err = alertmanager.NewMultitenantAlertmanager(&cfg.Alertmanager, util.Logger, prometheus.DefaultRegisterer)
 	if err != nil {
 		return err
 	}
diff --git a/pkg/util/metrics_helper.go b/pkg/util/metrics_helper.go
index 234b154ebd6..e934ab9c665 100644
--- a/pkg/util/metrics_helper.go
+++ b/pkg/util/metrics_helper.go
@@ -31,6 +31,13 @@ func (m singleValueWithLabelsMap) aggregateFn(labelsKey string, labelValues []st
 	m[labelsKey] = r
 }
 
+func (m singleValueWithLabelsMap) appendUserLabelValue(user string) {
+	for key, mlv := range m {
+		mlv.LabelValues = append([]string{user}, mlv.LabelValues...)
+		m[key] = mlv
+	}
+}
+
 func (m singleValueWithLabelsMap) WriteToMetricChannel(out chan<- prometheus.Metric, desc *prometheus.Desc, valueType prometheus.ValueType) {
 	for _, cr := range m {
 		out <- prometheus.MustNewConstMetric(desc, valueType, cr.Value, cr.LabelValues...)
@@ -162,6 +169,17 @@ func (d MetricFamiliesPerUser) SendSumOfGaugesWithLabels(out chan<- prometheus.M
 	d.sumOfSingleValuesWithLabels(gauge, gaugeValue, labelNames).WriteToMetricChannel(out, desc, prometheus.GaugeValue)
 }
 
+// SendSumOfGaugesPerUserWithLabels provides metrics with the provided label names on a per-user basis. This function assumes that `user` is the
+// first label on the provided metric Desc
+func (d MetricFamiliesPerUser) SendSumOfGaugesPerUserWithLabels(out chan<- prometheus.Metric, desc *prometheus.Desc, metric string, labelNames ...string) {
+	for user, userMetrics := range d {
+		result := singleValueWithLabelsMap{}
+		userMetrics.sumOfSingleValuesWithLabels(metric, labelNames, gaugeValue, result.aggregateFn)
+		result.appendUserLabelValue(user)
+		result.WriteToMetricChannel(out, desc, prometheus.GaugeValue)
+	}
+}
+
 func (d MetricFamiliesPerUser) sumOfSingleValuesWithLabels(metric string, fn func(*dto.Metric) float64, labelNames []string) singleValueWithLabelsMap {
 	result := singleValueWithLabelsMap{}
 	for _, userMetrics := range d {
diff --git a/pkg/util/metrics_helper_test.go b/pkg/util/metrics_helper_test.go
index 2b4b3f6ab81..aec2a348bbe 100644
--- a/pkg/util/metrics_helper_test.go
+++ b/pkg/util/metrics_helper_test.go
@@ -4,6 +4,7 @@ import (
 	"testing"
 
 	"github.com/gogo/protobuf/proto"
+	"github.com/prometheus/client_golang/prometheus"
 	dto "github.com/prometheus/client_model/go"
 	"github.com/stretchr/testify/require"
 )
@@ -81,3 +82,83 @@ func makeLabels(namesAndValues ...string) []*dto.LabelPair {
 
 	return out
 }
+
+// TestSendSumOfGaugesPerUserWithLabels tests to ensure multiple metrics for the same user with a matching label are
+// summed correctly
+func TestSendSumOfGaugesPerUserWithLabels(t *testing.T) {
+	user1Metric := prometheus.NewGaugeVec(prometheus.GaugeOpts{Name: "test_metric"}, []string{"label_one", "label_two"})
+	user2Metric := prometheus.NewGaugeVec(prometheus.GaugeOpts{Name: "test_metric"}, []string{"label_one", "label_two"})
+	user1Metric.WithLabelValues("a", "b").Set(100)
+	user1Metric.WithLabelValues("a", "c").Set(80)
+	user2Metric.WithLabelValues("a", "b").Set(60)
+	user2Metric.WithLabelValues("a", "c").Set(40)
+
+	user1Reg := prometheus.NewRegistry()
+	user2Reg := prometheus.NewRegistry()
+	user1Reg.MustRegister(user1Metric)
+	user2Reg.MustRegister(user2Metric)
+
+	mf := BuildMetricFamiliesPerUserFromUserRegistries(map[string]*prometheus.Registry{
+		"user-1": user1Reg,
+		"user-2": user2Reg,
+	})
+
+	desc := prometheus.NewDesc("test_metric", "", []string{"user", "label_one"}, nil)
+	actual, err := collectMetrics(func(out chan prometheus.Metric) {
+		mf.SendSumOfGaugesPerUserWithLabels(out, desc, "test_metric", "label_one")
+	})
+	require.NoError(t, err)
+	expected := []*dto.Metric{
+		{Label: makeLabels("label_one", "a", "user", "user-1"), Gauge: &dto.Gauge{Value: proto.Float64(180)}},
+		{Label: makeLabels("label_one", "a", "user", "user-2"), Gauge: &dto.Gauge{Value: proto.Float64(100)}},
+	}
+	require.ElementsMatch(t, expected, actual)
+
+	desc = prometheus.NewDesc("test_metric", "", []string{"user", "label_two"}, nil)
+	actual, err = collectMetrics(func(out chan prometheus.Metric) {
+		mf.SendSumOfGaugesPerUserWithLabels(out, desc, "test_metric", "label_two")
+	})
+	require.NoError(t, err)
+	expected = []*dto.Metric{
+		{Label: makeLabels("label_two", "b", "user", "user-1"), Gauge: &dto.Gauge{Value: proto.Float64(100)}},
+		{Label: makeLabels("label_two", "c", "user", "user-1"), Gauge: &dto.Gauge{Value: proto.Float64(80)}},
+		{Label: makeLabels("label_two", "b", "user", "user-2"), Gauge: &dto.Gauge{Value: proto.Float64(60)}},
+		{Label: makeLabels("label_two", "c", "user", "user-2"), Gauge: &dto.Gauge{Value: proto.Float64(40)}},
+	}
+	require.ElementsMatch(t, expected, actual)
+
+	desc = prometheus.NewDesc("test_metric", "", []string{"user", "label_one", "label_two"}, nil)
+	actual, err = collectMetrics(func(out chan prometheus.Metric) {
+		mf.SendSumOfGaugesPerUserWithLabels(out, desc, "test_metric", "label_one", "label_two")
+	})
+	require.NoError(t, err)
+	expected = []*dto.Metric{
+		{Label: makeLabels("label_one", "a", "label_two", "b", "user", "user-1"), Gauge: &dto.Gauge{Value: proto.Float64(100)}},
+		{Label: makeLabels("label_one", "a", "label_two", "c", "user", "user-1"), Gauge: &dto.Gauge{Value: proto.Float64(80)}},
+		{Label: makeLabels("label_one", "a", "label_two", "b", "user", "user-2"), Gauge: &dto.Gauge{Value: proto.Float64(60)}},
+		{Label: makeLabels("label_one", "a", "label_two", "c", "user", "user-2"), Gauge: &dto.Gauge{Value: proto.Float64(40)}},
+	}
+	require.ElementsMatch(t, expected, actual)
+}
+
+func collectMetrics(send func(out chan prometheus.Metric)) ([]*dto.Metric, error) {
+	out := make(chan prometheus.Metric)
+
+	go func() {
+		send(out)
+		close(out)
+	}()
+
+	metrics := []*dto.Metric{}
+	for m := range out {
+		collected := &dto.Metric{}
+		err := m.Write(collected)
+		if err != nil {
+			return nil, err
+		}
+
+		metrics = append(metrics, collected)
+	}
+
+	return metrics, nil
+}

From 3bece6ca6bba5994099fcf07ab89192b8ef78aa8 Mon Sep 17 00:00:00 2001
From: Jacob Lisi <jacob.t.lisi@gmail.com>
Date: Thu, 27 Feb 2020 10:48:05 -0500
Subject: [PATCH 2/7] fix linting error

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>
---
 pkg/alertmanager/alertmanager.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pkg/alertmanager/alertmanager.go b/pkg/alertmanager/alertmanager.go
index cafb95ea38a..95ce35787f5 100644
--- a/pkg/alertmanager/alertmanager.go
+++ b/pkg/alertmanager/alertmanager.go
@@ -181,7 +181,7 @@ func (am *Alertmanager) ApplyConfig(userID string, conf *config.Config) error {
 	am.active = true
 	am.activeMtx.Unlock()
 
-	templateFiles := make([]string, len(conf.Templates), len(conf.Templates))
+	templateFiles := make([]string, len(conf.Templates))
 	if len(conf.Templates) > 0 {
 		for i, t := range conf.Templates {
 			templateFiles[i] = filepath.Join(am.cfg.DataDir, "templates", userID, t)

From 20cfea6ac6d92fb54b5ebc169f8c60bca97b5c57 Mon Sep 17 00:00:00 2001
From: Jacob Lisi <jacob.t.lisi@gmail.com>
Date: Fri, 28 Feb 2020 12:01:19 -0500
Subject: [PATCH 3/7] update in regards to PR comments

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>
---
 pkg/alertmanager/alertmanager.go     | 25 ++++++++++++++++---------
 pkg/alertmanager/multitenant.go      |  7 ++++++-
 pkg/alertmanager/multitenant_test.go |  8 ++------
 pkg/util/metrics_helper.go           |  4 ++--
 4 files changed, 26 insertions(+), 18 deletions(-)

diff --git a/pkg/alertmanager/alertmanager.go b/pkg/alertmanager/alertmanager.go
index 95ce35787f5..3851a3afcb6 100644
--- a/pkg/alertmanager/alertmanager.go
+++ b/pkg/alertmanager/alertmanager.go
@@ -176,11 +176,6 @@ func clusterWait(p *cluster.Peer, timeout time.Duration) func() time.Duration {
 
 // ApplyConfig applies a new configuration to an Alertmanager.
 func (am *Alertmanager) ApplyConfig(userID string, conf *config.Config) error {
-	// Ensure the alertmanager is set to active
-	am.activeMtx.Lock()
-	am.active = true
-	am.activeMtx.Unlock()
-
 	templateFiles := make([]string, len(conf.Templates))
 	if len(conf.Templates) > 0 {
 		for i, t := range conf.Templates {
@@ -241,25 +236,37 @@ func (am *Alertmanager) ApplyConfig(userID string, conf *config.Config) error {
 	go am.dispatcher.Run()
 	go am.inhibitor.Run()
 
+	// Ensure the alertmanager is set to active
+	am.activeMtx.Lock()
+	am.active = true
+	am.activeMtx.Unlock()
+
 	return nil
 }
 
+func (am *Alertmanager) isActive() bool {
+	am.activeMtx.Lock()
+	defer am.activeMtx.Unlock()
+	return am.active
+}
+
 // Pause running jobs in the alertmanager that are able to be restarted and sets
-// to inactive
+// to inactives
 func (am *Alertmanager) Pause() {
 	// Set to inactive
 	am.activeMtx.Lock()
 	am.active = false
 	am.activeMtx.Unlock()
 
-	// Ensure inhibitor is set before being called
+	// Stop the inhibitor and dispatcher which will be recreated when
+	// a new config is applied
 	if am.inhibitor != nil {
 		am.inhibitor.Stop()
+		am.inhibitor = nil
 	}
-
-	// Ensure dispatcher is set before being called
 	if am.dispatcher != nil {
 		am.dispatcher.Stop()
+		am.dispatcher = nil
 	}
 
 	// Remove all of the active silences from the alertmanager
diff --git a/pkg/alertmanager/multitenant.go b/pkg/alertmanager/multitenant.go
index 58a573adab3..93c4089db54 100644
--- a/pkg/alertmanager/multitenant.go
+++ b/pkg/alertmanager/multitenant.go
@@ -307,6 +307,9 @@ func (am *MultitenantAlertmanager) syncConfigs(cfgs map[string]alerts.AlertConfi
 	defer am.alertmanagersMtx.Unlock()
 	for user, userAM := range am.alertmanagers {
 		if _, exists := cfgs[user]; !exists {
+			// The user alertmanager is only paused in order to retain the prometheus metrics
+			// it has reported to it's registry. If a new config for this user appears, this structure
+			// will be reused.
 			level.Info(am.logger).Log("msg", "deactivating per-tenant alertmanager", "user", user)
 			userAM.Pause()
 			delete(am.cfgs, user)
@@ -447,6 +450,8 @@ func (am *MultitenantAlertmanager) newAlertmanager(userID string, amConfig *amco
 	if err := newAM.ApplyConfig(userID, amConfig); err != nil {
 		return nil, fmt.Errorf("unable to apply initial config for user %v: %v", userID, err)
 	}
+
+	am.metrics.addUserRegistry(userID, reg)
 	return newAM, nil
 }
 
@@ -464,7 +469,7 @@ func (am *MultitenantAlertmanager) ServeHTTP(w http.ResponseWriter, req *http.Re
 	userAM.activeMtx.Lock()
 	defer userAM.activeMtx.Unlock()
 
-	if !ok || !userAM.active {
+	if !ok || !userAM.isActive() {
 		http.Error(w, fmt.Sprintf("no Alertmanager for this user ID"), http.StatusNotFound)
 		return
 	}
diff --git a/pkg/alertmanager/multitenant_test.go b/pkg/alertmanager/multitenant_test.go
index d8cb8207ccd..f38c35beaa5 100644
--- a/pkg/alertmanager/multitenant_test.go
+++ b/pkg/alertmanager/multitenant_test.go
@@ -123,9 +123,7 @@ func TestLoadAllConfigs(t *testing.T) {
 
 	userAM, exists := am.alertmanagers["user3"]
 	require.True(t, exists)
-	userAM.activeMtx.Lock()
-	require.False(t, userAM.active)
-	userAM.activeMtx.Unlock()
+	require.False(t, userAM.isActive())
 
 	// Ensure when a 3rd config is re-added, it is synced correctly
 	mockStore.configs["user3"] = alerts.AlertConfigDesc{
@@ -142,7 +140,5 @@ func TestLoadAllConfigs(t *testing.T) {
 
 	userAM, exists = am.alertmanagers["user3"]
 	require.True(t, exists)
-	userAM.activeMtx.Lock()
-	require.True(t, userAM.active)
-	userAM.activeMtx.Unlock()
+	require.True(t, userAM.isActive())
 }
diff --git a/pkg/util/metrics_helper.go b/pkg/util/metrics_helper.go
index e934ab9c665..47cc0d30f2d 100644
--- a/pkg/util/metrics_helper.go
+++ b/pkg/util/metrics_helper.go
@@ -31,7 +31,7 @@ func (m singleValueWithLabelsMap) aggregateFn(labelsKey string, labelValues []st
 	m[labelsKey] = r
 }
 
-func (m singleValueWithLabelsMap) appendUserLabelValue(user string) {
+func (m singleValueWithLabelsMap) prependUserLabelValue(user string) {
 	for key, mlv := range m {
 		mlv.LabelValues = append([]string{user}, mlv.LabelValues...)
 		m[key] = mlv
@@ -175,7 +175,7 @@ func (d MetricFamiliesPerUser) SendSumOfGaugesPerUserWithLabels(out chan<- prome
 	for user, userMetrics := range d {
 		result := singleValueWithLabelsMap{}
 		userMetrics.sumOfSingleValuesWithLabels(metric, labelNames, gaugeValue, result.aggregateFn)
-		result.appendUserLabelValue(user)
+		result.prependUserLabelValue(user)
 		result.WriteToMetricChannel(out, desc, prometheus.GaugeValue)
 	}
 }

From ef6b4ff718104adb0293dbe0e218a0538f1a55aa Mon Sep 17 00:00:00 2001
From: Jacob Lisi <jacob.t.lisi@gmail.com>
Date: Sun, 1 Mar 2020 14:02:46 -0500
Subject: [PATCH 4/7] fix double lock

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>
---
 pkg/alertmanager/multitenant.go | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/pkg/alertmanager/multitenant.go b/pkg/alertmanager/multitenant.go
index 93c4089db54..e26073541a4 100644
--- a/pkg/alertmanager/multitenant.go
+++ b/pkg/alertmanager/multitenant.go
@@ -466,9 +466,6 @@ func (am *MultitenantAlertmanager) ServeHTTP(w http.ResponseWriter, req *http.Re
 	userAM, ok := am.alertmanagers[userID]
 	am.alertmanagersMtx.Unlock()
 
-	userAM.activeMtx.Lock()
-	defer userAM.activeMtx.Unlock()
-
 	if !ok || !userAM.isActive() {
 		http.Error(w, fmt.Sprintf("no Alertmanager for this user ID"), http.StatusNotFound)
 		return

From 87bf3d41a4b2d66f04b49e44fc37809a5c520ebf Mon Sep 17 00:00:00 2001
From: Jacob Lisi <jacob.t.lisi@gmail.com>
Date: Sun, 1 Mar 2020 14:23:20 -0500
Subject: [PATCH 5/7] adjust cortex_alertmanager_alerts_received_total to be
 per user

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>
---
 pkg/alertmanager/alertmanager_metrics.go      | 8 ++++----
 pkg/alertmanager/alertmanager_metrics_test.go | 8 ++++++--
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/pkg/alertmanager/alertmanager_metrics.go b/pkg/alertmanager/alertmanager_metrics.go
index 723f1355d74..721053dcbb4 100644
--- a/pkg/alertmanager/alertmanager_metrics.go
+++ b/pkg/alertmanager/alertmanager_metrics.go
@@ -54,11 +54,11 @@ func newAlertmanagerMetrics() *alertmanagerMetrics {
 		alertsReceived: prometheus.NewDesc(
 			"cortex_alertmanager_alerts_received_total",
 			"The total number of received alerts.",
-			nil, nil),
+			[]string{"user"}, nil),
 		alertsInvalid: prometheus.NewDesc(
 			"cortex_alertmanager_alerts_invalid_total",
 			"The total number of received alerts that were invalid.",
-			nil, nil),
+			[]string{"user"}, nil),
 		numNotifications: prometheus.NewDesc(
 			"cortex_alertmanager_notifications_total",
 			"The total number of attempted notifications.",
@@ -183,8 +183,8 @@ func (m *alertmanagerMetrics) Describe(out chan<- *prometheus.Desc) {
 func (m *alertmanagerMetrics) Collect(out chan<- prometheus.Metric) {
 	data := util.BuildMetricFamiliesPerUserFromUserRegistries(m.registries())
 
-	data.SendSumOfCounters(out, m.alertsReceived, "alertmanager_alerts_received_total")
-	data.SendSumOfCounters(out, m.alertsInvalid, "alertmanager_alerts_invalid_total")
+	data.SendSumOfCountersPerUser(out, m.alertsReceived, "alertmanager_alerts_received_total")
+	data.SendSumOfCountersPerUser(out, m.alertsInvalid, "alertmanager_alerts_invalid_total")
 
 	data.SendSumOfCountersPerUser(out, m.numNotifications, "alertmanager_notifications_total")
 	data.SendSumOfCountersPerUser(out, m.numFailedNotifications, "alertmanager_notifications_failed_total")
diff --git a/pkg/alertmanager/alertmanager_metrics_test.go b/pkg/alertmanager/alertmanager_metrics_test.go
index bd893c646ba..6a8c2b6ba0c 100644
--- a/pkg/alertmanager/alertmanager_metrics_test.go
+++ b/pkg/alertmanager/alertmanager_metrics_test.go
@@ -43,10 +43,14 @@ func TestAlertmanagerMetricsStore(t *testing.T) {
 		cortex_alertmanager_alerts{state="suppressed",user="user3"} 200
 		# HELP cortex_alertmanager_alerts_invalid_total The total number of received alerts that were invalid.
 		# TYPE cortex_alertmanager_alerts_invalid_total counter
-		cortex_alertmanager_alerts_invalid_total 222
+		cortex_alertmanager_alerts_invalid_total{user="user1"} 2
+		cortex_alertmanager_alerts_invalid_total{user="user2"} 20
+		cortex_alertmanager_alerts_invalid_total{user="user3"} 200
 		# HELP cortex_alertmanager_alerts_received_total The total number of received alerts.
 		# TYPE cortex_alertmanager_alerts_received_total counter
-		cortex_alertmanager_alerts_received_total 1110
+		cortex_alertmanager_alerts_received_total{user="user1"} 10
+		cortex_alertmanager_alerts_received_total{user="user2"} 100
+		cortex_alertmanager_alerts_received_total{user="user3"} 1000
 		# HELP cortex_alertmanager_nflog_gc_duration_seconds Duration of the last notification log garbage collection cycle.
 		# TYPE cortex_alertmanager_nflog_gc_duration_seconds summary
 		cortex_alertmanager_nflog_gc_duration_seconds_sum 111

From f50885f47700030471d19d23d19c9fe86177dc6f Mon Sep 17 00:00:00 2001
From: Jacob Lisi <jacob.t.lisi@gmail.com>
Date: Mon, 2 Mar 2020 09:56:01 -0500
Subject: [PATCH 6/7] make isActive public function and fix typo

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>
---
 pkg/alertmanager/alertmanager.go | 2 +-
 pkg/alertmanager/multitenant.go  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pkg/alertmanager/alertmanager.go b/pkg/alertmanager/alertmanager.go
index 3851a3afcb6..89e845ea130 100644
--- a/pkg/alertmanager/alertmanager.go
+++ b/pkg/alertmanager/alertmanager.go
@@ -244,7 +244,7 @@ func (am *Alertmanager) ApplyConfig(userID string, conf *config.Config) error {
 	return nil
 }
 
-func (am *Alertmanager) isActive() bool {
+func (am *Alertmanager) IsActive() bool {
 	am.activeMtx.Lock()
 	defer am.activeMtx.Unlock()
 	return am.active
diff --git a/pkg/alertmanager/multitenant.go b/pkg/alertmanager/multitenant.go
index e26073541a4..3e2d3aecd9c 100644
--- a/pkg/alertmanager/multitenant.go
+++ b/pkg/alertmanager/multitenant.go
@@ -308,7 +308,7 @@ func (am *MultitenantAlertmanager) syncConfigs(cfgs map[string]alerts.AlertConfi
 	for user, userAM := range am.alertmanagers {
 		if _, exists := cfgs[user]; !exists {
 			// The user alertmanager is only paused in order to retain the prometheus metrics
-			// it has reported to it's registry. If a new config for this user appears, this structure
+			// it has reported to its registry. If a new config for this user appears, this structure
 			// will be reused.
 			level.Info(am.logger).Log("msg", "deactivating per-tenant alertmanager", "user", user)
 			userAM.Pause()
@@ -466,7 +466,7 @@ func (am *MultitenantAlertmanager) ServeHTTP(w http.ResponseWriter, req *http.Re
 	userAM, ok := am.alertmanagers[userID]
 	am.alertmanagersMtx.Unlock()
 
-	if !ok || !userAM.isActive() {
+	if !ok || !userAM.IsActive() {
 		http.Error(w, fmt.Sprintf("no Alertmanager for this user ID"), http.StatusNotFound)
 		return
 	}

From cabeb79e29ab90e3e933ea704381181c3767a0e2 Mon Sep 17 00:00:00 2001
From: Jacob Lisi <jacob.t.lisi@gmail.com>
Date: Mon, 2 Mar 2020 09:56:54 -0500
Subject: [PATCH 7/7] comment IsActive

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>
---
 pkg/alertmanager/alertmanager.go | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pkg/alertmanager/alertmanager.go b/pkg/alertmanager/alertmanager.go
index 89e845ea130..0d7cce998e4 100644
--- a/pkg/alertmanager/alertmanager.go
+++ b/pkg/alertmanager/alertmanager.go
@@ -244,6 +244,8 @@ func (am *Alertmanager) ApplyConfig(userID string, conf *config.Config) error {
 	return nil
 }
 
+// IsActive returns if the alertmanager is currently running
+// or is paused
 func (am *Alertmanager) IsActive() bool {
 	am.activeMtx.Lock()
 	defer am.activeMtx.Unlock()