From f04ee2fa5faa3708451dd71e9a3e6a913707d6af Mon Sep 17 00:00:00 2001 From: Yao Wu Date: Fri, 6 Jul 2018 11:46:27 -0700 Subject: [PATCH 1/9] add activator metrics --- cmd/activator/main.go | 47 ++- config/activator.yaml | 2 +- .../300-prometheus/100-scrape-config.yaml | 27 ++ pkg/activator/activator.go | 7 +- pkg/activator/dedupe.go | 8 +- pkg/activator/dedupe_test.go | 54 ++-- pkg/activator/revision.go | 21 +- pkg/activator/revision_test.go | 35 ++- pkg/activator/stats_reporter.go | 267 ++++++++++++++++++ pkg/activator/stats_reporter_test.go | 76 +++++ pkg/controller/names.go | 4 + pkg/controller/route/istio/virtual_service.go | 1 + .../route/istio/virtual_service_test.go | 14 +- pkg/controller/route/route_test.go | 12 +- 14 files changed, 503 insertions(+), 72 deletions(-) create mode 100644 pkg/activator/stats_reporter.go create mode 100644 pkg/activator/stats_reporter_test.go diff --git a/cmd/activator/main.go b/cmd/activator/main.go index d80547c854e9..efe58a486e48 100644 --- a/cmd/activator/main.go +++ b/cmd/activator/main.go @@ -20,6 +20,7 @@ import ( "net/http" "net/http/httputil" "net/url" + "strconv" "time" "github.com/knative/serving/pkg/activator" @@ -29,7 +30,8 @@ import ( h2cutil "github.com/knative/serving/pkg/h2c" "github.com/knative/serving/pkg/logging" "github.com/knative/serving/pkg/signals" - "github.com/knative/serving/third_party/h2c" + "go.opencensus.io/exporter/prometheus" + "go.opencensus.io/stats/view" "go.uber.org/zap" "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" @@ -41,15 +43,17 @@ const ( ) type activationHandler struct { - act activator.Activator - logger *zap.SugaredLogger + act activator.Activator + logger *zap.SugaredLogger + reporter activator.StatsReporter } // retryRoundTripper retries on 503's for up to 60 seconds. The reason is there is // a small delay for k8s to include the ready IP in service. // https://github.com/knative/serving/issues/660#issuecomment-384062553 type retryRoundTripper struct { - logger *zap.SugaredLogger + logger *zap.SugaredLogger + reporter activator.StatsReporter } func (rrt retryRoundTripper) RoundTrip(r *http.Request) (*http.Response, error) { @@ -74,13 +78,16 @@ func (rrt retryRoundTripper) RoundTrip(r *http.Request) (*http.Response, error) } // TODO: add metrics for number of tries and the response code. rrt.logger.Infof("It took %d tries to get response code %d", i, resp.StatusCode) + rrt.reporter.ReportResponse("default", "configuration-example", "configuration-example-00001", strconv.Itoa(resp.StatusCode), activator.ResponseCountM, 1.0) return resp, nil } func (a *activationHandler) handler(w http.ResponseWriter, r *http.Request) { namespace := r.Header.Get(controller.GetRevisionHeaderNamespace()) name := r.Header.Get(controller.GetRevisionHeaderName()) - endpoint, status, err := a.act.ActiveEndpoint(namespace, name) + config := r.Header.Get(controller.GetConfigurationHeader()) + a.logger.Info("config: ", config) + endpoint, status, err := a.act.ActiveEndpoint(namespace, config, name) if err != nil { msg := fmt.Sprintf("Error getting active endpoint: %v", err) a.logger.Errorf(msg) @@ -93,7 +100,8 @@ func (a *activationHandler) handler(w http.ResponseWriter, r *http.Request) { } proxy := httputil.NewSingleHostReverseProxy(target) proxy.Transport = retryRoundTripper{ - logger: a.logger, + logger: a.logger, + reporter: a.reporter, } // TODO: Clear the host to avoid 404's. @@ -127,9 +135,22 @@ func main() { logger.Fatal("Error building serving clientset: %v", zap.Error(err)) } - a := activator.NewRevisionActivator(kubeClient, servingClient, logger) + logger.Info("Initializing OpenCensus Prometheus exporter.") + promExporter, err := prometheus.NewExporter(prometheus.Options{Namespace: "activator"}) + if err != nil { + logger.Fatal("Failed to create the Prometheus exporter: %v", zap.Error(err)) + } + view.RegisterExporter(promExporter) + view.SetReportingPeriod(10 * time.Second) + + reporter, err := activator.NewStatsReporter() + if err != nil { + logger.Fatal("Failed to create stats reporter: %v", zap.Error(err)) + } + + a := activator.NewRevisionActivator(kubeClient, servingClient, logger, reporter) a = activator.NewDedupingActivator(a) - ah := &activationHandler{a, logger} + ah := &activationHandler{a, logger, reporter} // set up signals so we handle the first shutdown signal gracefully stopCh := signals.SetupSignalHandler() @@ -138,6 +159,12 @@ func main() { a.Shutdown() }() - http.HandleFunc("/", ah.handler) - h2c.ListenAndServe(":8080", nil) + // http.HandleFunc("/", ah.handler) + // h2c.ListenAndServe(":8080", nil) + + // Start the endpoint for Prometheus scraping + mux := http.NewServeMux() + mux.HandleFunc("/", ah.handler) + mux.Handle("/metrics", promExporter) + http.ListenAndServe(":8080", mux) } diff --git a/config/activator.yaml b/config/activator.yaml index cfda00c92304..ea552c832849 100644 --- a/config/activator.yaml +++ b/config/activator.yaml @@ -36,7 +36,7 @@ spec: # and substituted here. image: github.com/knative/serving/cmd/activator ports: - - name: http + - name: activator-port containerPort: 8080 args: - "-logtostderr=true" diff --git a/config/monitoring/200-common/300-prometheus/100-scrape-config.yaml b/config/monitoring/200-common/300-prometheus/100-scrape-config.yaml index ba2ee7f1e5ac..7034a38f15e7 100644 --- a/config/monitoring/200-common/300-prometheus/100-scrape-config.yaml +++ b/config/monitoring/200-common/300-prometheus/100-scrape-config.yaml @@ -56,6 +56,33 @@ data: regex: (.*) target_label: service replacement: $1 + # Activator pods + - job_name: activator + scrape_interval: 3s + scrape_timeout: 3s + kubernetes_sd_configs: + - role: pod + relabel_configs: + # Scrape only the the targets matching the following metadata + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_label_app, __meta_kubernetes_pod_container_port_name] + action: keep + regex: knative-serving;activator;activator-port + # Rename metadata labels to be reader friendly + - source_labels: [__meta_kubernetes_namespace] + action: replace + regex: (.*) + target_label: namespace + replacement: $1 + - source_labels: [__meta_kubernetes_pod_name] + action: replace + regex: (.*) + target_label: pod + replacement: $1 + - source_labels: [__meta_kubernetes_service_name] + action: replace + regex: (.*) + target_label: service + replacement: $1 # Fluentd daemonset - job_name: fluentd-ds kubernetes_sd_configs: diff --git a/pkg/activator/activator.go b/pkg/activator/activator.go index 9321e224d258..3e3b70db1dde 100644 --- a/pkg/activator/activator.go +++ b/pkg/activator/activator.go @@ -21,13 +21,14 @@ type Status int // Activator provides an active endpoint for a revision or an error and // status code indicating why it could not. type Activator interface { - ActiveEndpoint(namespace, name string) (Endpoint, Status, error) + ActiveEndpoint(namespace, configuration, name string) (Endpoint, Status, error) Shutdown() } type revisionID struct { - namespace string - name string + namespace string + configuration string + name string } // Endpoint is a fully-qualified domain name / port pair for an active revision. diff --git a/pkg/activator/dedupe.go b/pkg/activator/dedupe.go index 66f0e2ccd858..3ce2ebc1daa4 100644 --- a/pkg/activator/dedupe.go +++ b/pkg/activator/dedupe.go @@ -50,8 +50,10 @@ func NewDedupingActivator(a Activator) Activator { } } -func (a *dedupingActivator) ActiveEndpoint(namespace, name string) (Endpoint, Status, error) { - id := revisionID{namespace: namespace, name: name} +func (a *dedupingActivator) ActiveEndpoint(namespace, configuration, name string) (Endpoint, Status, error) { + id := revisionID{namespace: namespace, + configuration: configuration, + name: name} ch := make(chan activationResult, 1) a.dedupe(id, ch) result := <-ch @@ -86,7 +88,7 @@ func (a *dedupingActivator) dedupe(id revisionID, ch chan activationResult) { } func (a *dedupingActivator) activate(id revisionID) { - endpoint, status, err := a.activator.ActiveEndpoint(id.namespace, id.name) + endpoint, status, err := a.activator.ActiveEndpoint(id.namespace, id.configuration, id.name) a.mux.Lock() defer a.mux.Unlock() result := activationResult{ diff --git a/pkg/activator/dedupe_test.go b/pkg/activator/dedupe_test.go index 2760637b7c16..f03359fe4abe 100644 --- a/pkg/activator/dedupe_test.go +++ b/pkg/activator/dedupe_test.go @@ -28,7 +28,7 @@ func TestSingleRevision_SingleRequest_Success(t *testing.T) { want := Endpoint{"ip", 8080} f := newFakeActivator(t, map[revisionID]activationResult{ - revisionID{"default", "rev1"}: activationResult{ + revisionID{"default", "config", "rev1"}: activationResult{ endpoint: want, status: Status(0), err: nil, @@ -36,7 +36,7 @@ func TestSingleRevision_SingleRequest_Success(t *testing.T) { }) d := NewDedupingActivator(Activator(f)) - endpoint, status, err := d.ActiveEndpoint("default", "rev1") + endpoint, status, err := d.ActiveEndpoint("default", "config", "rev1") if err != nil { t.Errorf("Unexpected error: %v", err) @@ -56,7 +56,7 @@ func TestSingleRevision_MultipleRequests_Success(t *testing.T) { ep := Endpoint{"ip", 8080} f := newFakeActivator(t, map[revisionID]activationResult{ - revisionID{"default", "rev1"}: activationResult{ + revisionID{"default", "config", "rev1"}: activationResult{ endpoint: ep, status: Status(0), err: nil, @@ -65,8 +65,8 @@ func TestSingleRevision_MultipleRequests_Success(t *testing.T) { d := NewDedupingActivator(f) got := concurrentTest(d, f, []revisionID{ - revisionID{"default", "rev1"}, - revisionID{"default", "rev1"}, + revisionID{"default", "config", "rev1"}, + revisionID{"default", "config", "rev1"}, }) want := []activationResult{ @@ -86,12 +86,12 @@ func TestMultipleRevisions_MultipleRequests_Success(t *testing.T) { ep2 := Endpoint{"ip2", 8080} f := newFakeActivator(t, map[revisionID]activationResult{ - revisionID{"default", "rev1"}: activationResult{ + revisionID{"default", "config", "rev1"}: activationResult{ endpoint: ep1, status: Status(0), err: nil, }, - revisionID{"default", "rev2"}: activationResult{ + revisionID{"default", "config", "rev2"}: activationResult{ endpoint: ep2, status: Status(0), err: nil, @@ -100,10 +100,10 @@ func TestMultipleRevisions_MultipleRequests_Success(t *testing.T) { d := NewDedupingActivator(f) got := concurrentTest(d, f, []revisionID{ - revisionID{"default", "rev1"}, - revisionID{"default", "rev2"}, - revisionID{"default", "rev1"}, - revisionID{"default", "rev2"}, + revisionID{"default", "config", "rev1"}, + revisionID{"default", "config", "rev2"}, + revisionID{"default", "config", "rev1"}, + revisionID{"default", "config", "rev2"}, }) want := []activationResult{ @@ -126,12 +126,12 @@ func TestMultipleRevisions_MultipleRequests_PartialSuccess(t *testing.T) { error2 := fmt.Errorf("test error") f := newFakeActivator(t, map[revisionID]activationResult{ - revisionID{"default", "rev1"}: activationResult{ + revisionID{"default", "config", "rev1"}: activationResult{ endpoint: ep1, status: Status(0), err: nil, }, - revisionID{"default", "rev2"}: activationResult{ + revisionID{"default", "config", "rev2"}: activationResult{ endpoint: Endpoint{}, status: status2, err: error2, @@ -140,10 +140,10 @@ func TestMultipleRevisions_MultipleRequests_PartialSuccess(t *testing.T) { d := NewDedupingActivator(f) got := concurrentTest(d, f, []revisionID{ - revisionID{"default", "rev1"}, - revisionID{"default", "rev2"}, - revisionID{"default", "rev1"}, - revisionID{"default", "rev2"}, + revisionID{"default", "config", "rev1"}, + revisionID{"default", "config", "rev2"}, + revisionID{"default", "config", "rev1"}, + revisionID{"default", "config", "rev2"}, }) want := []activationResult{ @@ -166,7 +166,7 @@ func TestSingleRevision_MultipleRequests_FailureRecovery(t *testing.T) { failErr := fmt.Errorf("test error") f := newFakeActivator(t, map[revisionID]activationResult{ - revisionID{"default", "rev1"}: activationResult{ + revisionID{"default", "config", "rev1"}: activationResult{ endpoint: failEp, status: failStatus, err: failErr, @@ -175,7 +175,7 @@ func TestSingleRevision_MultipleRequests_FailureRecovery(t *testing.T) { d := NewDedupingActivator(Activator(f)) // Activation initially fails - endpoint, status, err := d.ActiveEndpoint("default", "rev1") + endpoint, status, err := d.ActiveEndpoint("default", "config", "rev1") if err != failErr { t.Errorf("Unexpected error. Want %v. Got %v.", failErr, err) @@ -193,13 +193,13 @@ func TestSingleRevision_MultipleRequests_FailureRecovery(t *testing.T) { // Later activation succeeds successEp := Endpoint{"ip", 8080} successStatus := Status(0) - f.responses[revisionID{"default", "rev1"}] = activationResult{ + f.responses[revisionID{"default", "config", "rev1"}] = activationResult{ endpoint: successEp, status: successStatus, err: nil, } - endpoint, status, err = d.ActiveEndpoint("default", "rev1") + endpoint, status, err = d.ActiveEndpoint("default", "config", "rev1") if err != nil { t.Errorf("Unexpected error. Want %v. Got %v.", nil, err) @@ -219,20 +219,20 @@ func TestShutdown_ReturnError(t *testing.T) { ep := Endpoint{"ip", 8080} f := newFakeActivator(t, map[revisionID]activationResult{ - revisionID{"default", "rev1"}: activationResult{ + revisionID{"default", "config", "rev1"}: activationResult{ endpoint: ep, status: Status(0), err: nil, }, }) d := NewDedupingActivator(Activator(f)) - f.hold(revisionID{"default", "rev1"}) + f.hold(revisionID{"default", "config", "rev1"}) go func() { time.Sleep(100 * time.Millisecond) d.Shutdown() }() - endpoint, status, err := d.ActiveEndpoint("default", "rev1") + endpoint, status, err := d.ActiveEndpoint("default", "config", "rev1") want := Endpoint{} if endpoint != want { @@ -264,8 +264,8 @@ func newFakeActivator(t *testing.T, responses map[revisionID]activationResult) * } } -func (f *fakeActivator) ActiveEndpoint(namespace, name string) (Endpoint, Status, error) { - id := revisionID{namespace, name} +func (f *fakeActivator) ActiveEndpoint(namespace, configuration, name string) (Endpoint, Status, error) { + id := revisionID{namespace, configuration, name} f.recordMutex.Lock() f.record = append(f.record, id) @@ -314,7 +314,7 @@ func concurrentTest(a Activator, f *fakeActivator, ids []revisionID) []activatio end.Add(1) go func(index int, id revisionID) { start.Done() - endpoint, status, err := a.ActiveEndpoint(id.namespace, id.name) + endpoint, status, err := a.ActiveEndpoint(id.namespace, id.configuration, id.name) results[index] = activationResult{endpoint, status, err} end.Done() }(i, id) diff --git a/pkg/activator/revision.go b/pkg/activator/revision.go index 172ed00dc72f..b37a8989f81f 100644 --- a/pkg/activator/revision.go +++ b/pkg/activator/revision.go @@ -36,17 +36,19 @@ type revisionActivator struct { kubeClient kubernetes.Interface knaClient clientset.Interface logger *zap.SugaredLogger + reporter StatsReporter } // NewRevisionActivator creates an Activator that changes revision // serving status to active if necessary, then returns the endpoint // once the revision is ready to serve traffic. -func NewRevisionActivator(kubeClient kubernetes.Interface, servingClient clientset.Interface, logger *zap.SugaredLogger) Activator { +func NewRevisionActivator(kubeClient kubernetes.Interface, servingClient clientset.Interface, logger *zap.SugaredLogger, reporter StatsReporter) Activator { return &revisionActivator{ readyTimout: 60 * time.Second, kubeClient: kubeClient, knaClient: servingClient, logger: logger, + reporter: reporter, } } @@ -54,12 +56,14 @@ func (r *revisionActivator) Shutdown() { // nothing to do } -func (r *revisionActivator) ActiveEndpoint(namespace, name string) (end Endpoint, status Status, activationError error) { +func (r *revisionActivator) ActiveEndpoint(namespace, configuration, name string) (end Endpoint, status Status, activationError error) { logger := loggerWithRevisionInfo(r.logger, namespace, name) - rev := revisionID{namespace: namespace, name: name} + rev := revisionID{namespace: namespace, + configuration: configuration, + name: name} internalError := func(msg string, args ...interface{}) (Endpoint, Status, error) { - logger.Infof(msg, args...) + logger.Errorf(msg, args...) return Endpoint{}, http.StatusInternalServerError, fmt.Errorf(fmt.Sprintf("%s for namespace: %s, revision name: %s ", msg, namespace, name), args...) } @@ -69,14 +73,23 @@ func (r *revisionActivator) ActiveEndpoint(namespace, name string) (end Endpoint if err != nil { return internalError("Unable to get revision: %v", err) } + switch revision.Spec.ServingState { default: + //r.reporter.Report(namespace, configuration, name, RequestCountUnknownM, 1.0) + r.reporter.ReportRequest(namespace, configuration, name, "Unknown", RequestCountM, 1.0) return internalError("Disregarding activation request for revision in unknown state %v", revision.Spec.ServingState) case v1alpha1.RevisionServingStateRetired: + //r.reporter.Report(namespace, configuration, name, RequestCountRetiredM, 1.0) + r.reporter.ReportRequest(namespace, configuration, name, "Retired", RequestCountM, 1.0) return internalError("Disregarding activation request for retired revision ") case v1alpha1.RevisionServingStateActive: // Revision is already active. Nothing to do + //r.reporter.Report(namespace, configuration, name, RequestCountActiveM, 1.0) + r.reporter.ReportRequest(namespace, configuration, name, "Active", RequestCountM, 1.0) case v1alpha1.RevisionServingStateReserve: + //r.reporter.Report(namespace, configuration, name, RequestCountReserveM, 1.0) + r.reporter.ReportRequest(namespace, configuration, name, "Reserve", RequestCountM, 1.0) // Activate the revision revision.Spec.ServingState = v1alpha1.RevisionServingStateActive if _, err := revisionClient.Update(revision); err != nil { diff --git a/pkg/activator/revision_test.go b/pkg/activator/revision_test.go index 07c93d0183de..a33cc38f6190 100644 --- a/pkg/activator/revision_test.go +++ b/pkg/activator/revision_test.go @@ -31,19 +31,26 @@ import ( ) const ( - testNamespace = "test-namespace" - testRevision = "test-rev" - testService = testRevision + "-service" - testServiceFQDN = testService + "." + testNamespace + ".svc.cluster.local" + testNamespace = "test-namespace" + testConfiguration = "test-configuration" + testRevision = "test-rev" + testService = testRevision + "-service" + testServiceFQDN = testService + "." + testNamespace + ".svc.cluster.local" ) +type mockReporter struct{} + +func (r *mockReporter) Report(ns string, config string, rev string, m Measurement, v float64) error { + return nil +} + func TestActiveEndpoint_Active_StaysActive(t *testing.T) { k8s, kna := fakeClients() kna.ServingV1alpha1().Revisions(testNamespace).Create(newRevisionBuilder().build()) k8s.CoreV1().Services(testNamespace).Create(newServiceBuilder().build()) - a := NewRevisionActivator(k8s, kna, TestLogger(t)) + a := NewRevisionActivator(k8s, kna, TestLogger(t), &mockReporter{}) - got, status, err := a.ActiveEndpoint(testNamespace, testRevision) + got, status, err := a.ActiveEndpoint(testNamespace, testConfiguration, testRevision) want := Endpoint{testServiceFQDN, 8080} if got != want { @@ -64,9 +71,9 @@ func TestActiveEndpoint_Reserve_BecomesActive(t *testing.T) { withServingState(v1alpha1.RevisionServingStateReserve). build()) k8s.CoreV1().Services(testNamespace).Create(newServiceBuilder().build()) - a := NewRevisionActivator(k8s, kna, TestLogger(t)) + a := NewRevisionActivator(k8s, kna, TestLogger(t), &mockReporter{}) - got, status, err := a.ActiveEndpoint(testNamespace, testRevision) + got, status, err := a.ActiveEndpoint(testNamespace, testConfiguration, testRevision) want := Endpoint{testServiceFQDN, 8080} if got != want { @@ -92,9 +99,9 @@ func TestActiveEndpoint_Retired_StaysRetiredWithError(t *testing.T) { withServingState(v1alpha1.RevisionServingStateRetired). build()) k8s.CoreV1().Services(testNamespace).Create(newServiceBuilder().build()) - a := NewRevisionActivator(k8s, kna, TestLogger(t)) + a := NewRevisionActivator(k8s, kna, TestLogger(t), &mockReporter{}) - got, status, err := a.ActiveEndpoint(testNamespace, testRevision) + got, status, err := a.ActiveEndpoint(testNamespace, testConfiguration, testRevision) want := Endpoint{} if got != want { @@ -121,11 +128,11 @@ func TestActiveEndpoint_Reserve_WaitsForReady(t *testing.T) { withReady(false). build()) k8s.CoreV1().Services(testNamespace).Create(newServiceBuilder().build()) - a := NewRevisionActivator(k8s, kna, TestLogger(t)) + a := NewRevisionActivator(k8s, kna, TestLogger(t), &mockReporter{}) ch := make(chan activationResult) go func() { - endpoint, status, err := a.ActiveEndpoint(testNamespace, testRevision) + endpoint, status, err := a.ActiveEndpoint(testNamespace, testConfiguration, testRevision) ch <- activationResult{endpoint, status, err} }() @@ -167,12 +174,12 @@ func TestActiveEndpoint_Reserve_ReadyTimeoutWithError(t *testing.T) { withReady(false). build()) k8s.CoreV1().Services(testNamespace).Create(newServiceBuilder().build()) - a := NewRevisionActivator(k8s, kna, TestLogger(t)) + a := NewRevisionActivator(k8s, kna, TestLogger(t), &mockReporter{}) a.(*revisionActivator).readyTimout = 200 * time.Millisecond ch := make(chan activationResult) go func() { - endpoint, status, err := a.ActiveEndpoint(testNamespace, testRevision) + endpoint, status, err := a.ActiveEndpoint(testNamespace, testConfiguration, testRevision) ch <- activationResult{endpoint, status, err} }() diff --git a/pkg/activator/stats_reporter.go b/pkg/activator/stats_reporter.go new file mode 100644 index 000000000000..ed3ac6e20b9b --- /dev/null +++ b/pkg/activator/stats_reporter.go @@ -0,0 +1,267 @@ +/* +Copyright 2018 Google Inc. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package activator + +import ( + "context" + "errors" + + "go.opencensus.io/stats" + "go.opencensus.io/stats/view" + "go.opencensus.io/tag" +) + +// Measurement represents the type of the autoscaler metric to be reported +type Measurement int + +const ( + // // RequestCountReserveM is the requests count that are routed to the activator when + // // the revision is Reserve + // RequestCountReserveM Measurement = iota + // // RequestCountActiveM is the requests count that are routed to the activator when + // // the revision is Active + // RequestCountActiveM + // // RequestCountRetiredM is the requests count that are routed to the activator when + // // the revision is Retired + // RequestCountRetiredM + // // RequestCountUnknownM is the requests count that are routed to the activator when + // // the revision is not Active, Reserve, and Retired + // RequestCountUnknownM + + // RequestCountM is the requests count that are routed to the activator + RequestCountM Measurement = iota + + //ResponseCountM is the response count when activator proxy the request + ResponseCountM + // ResponseCodeM is the response code when activator proxy the request + // ResponseCodeM + + // NumTriesM is the number of tries to get the response code + NumTriesM + // ResponseTimeInSecM is the response time in seconds + ResponseTimeInSecM +) + +var ( + measurements = []*stats.Float64Measure{ + // RequestCountReserveM: stats.Float64( + // "request_count_reserve", + // "The number of requests that are routed to the activator when the revision is Reserve", + // stats.UnitNone), + // RequestCountActiveM: stats.Float64( + // "request_count_active", + // "The number of requests that are routed to the activator when the revision is Active", + // stats.UnitNone), + // RequestCountRetiredM: stats.Float64( + // "request_count_retired", + // "The number of requests that are routed to the activator when the revision is Retired", + // stats.UnitNone), + // RequestCountUnknownM: stats.Float64( + // "request_count_unknown", + // "The number of requests that are routed to the activator when the revision is not Active, Reserve, and Retired", + // stats.UnitNone), + RequestCountM: stats.Float64( + "revision_request_count", + "The number of requests that are routed to the activator", + stats.UnitNone), + ResponseCountM: stats.Float64( + "response_count", + "The response count when activator proxy the request", + stats.UnitNone), + // ResponseCodeM: stats.Float64( + // "response_code", + // "The response code when activator proxy the request", + // stats.UnitNone), + NumTriesM: stats.Float64( + "num_tries", + "The number of tries to get the response", + stats.UnitNone), + ResponseTimeInSecM: stats.Float64( + "response_time_seconds", + "The response time in seconds", + stats.UnitNone), + } +) + +// StatsReporter defines the interface for sending activator metrics +type StatsReporter interface { + Report(ns string, config string, rev string, m Measurement, v float64) error + ReportRequest(ns, config, rev, servingState string, m Measurement, v float64) error + ReportResponse(ns, config, rev, responseCode string, m Measurement, v float64) error +} + +// Reporter holds cached metric objects to report autoscaler metrics +type Reporter struct { + initialized bool + namespaceTagKey tag.Key + configTagKey tag.Key + revisionTagKey tag.Key + servingStateKey tag.Key + responseCodeKey tag.Key +} + +// NewStatsReporter creates a reporter that collects and reports activator metrics +func NewStatsReporter() (*Reporter, error) { + + var r = &Reporter{} + + // Create the tag keys that will be used to add tags to our measurements. + nsTag, err := tag.NewKey("destination_namespace") + if err != nil { + return nil, err + } + r.namespaceTagKey = nsTag + configTag, err := tag.NewKey("destination_configuration") + if err != nil { + return nil, err + } + r.configTagKey = configTag + revTag, err := tag.NewKey("destination_revision") + if err != nil { + return nil, err + } + r.revisionTagKey = revTag + servingStateTag, err := tag.NewKey("servingState") + if err != nil { + return nil, err + } + r.servingStateKey = servingStateTag + responseCodeTag, err := tag.NewKey("responseCode") + if err != nil { + return nil, err + } + r.responseCodeKey = responseCodeTag + // Create view to see our measurements. + err = view.Register( + // &view.View{ + // Description: "The number of requests that are routed to the activator when the revision is Reserve", + // Measure: measurements[RequestCountReserveM], + // Aggregation: view.Count(), + // TagKeys: []tag.Key{r.namespaceTagKey, r.configTagKey, r.revisionTagKey}, + // }, + // &view.View{ + // Description: "The number of requests that are routed to the activator when the revision is Active", + // Measure: measurements[RequestCountActiveM], + // Aggregation: view.Count(), + // TagKeys: []tag.Key{r.namespaceTagKey, r.configTagKey, r.revisionTagKey}, + // }, + // &view.View{ + // Description: "The number of requests that are routed to the activator when the revision is Retired", + // Measure: measurements[RequestCountRetiredM], + // Aggregation: view.Count(), + // TagKeys: []tag.Key{r.namespaceTagKey, r.configTagKey, r.revisionTagKey}, + // }, + // &view.View{ + // Description: "The number of requests that are routed to the activator when the revision is not Active, Reserve, and Retired", + // Measure: measurements[RequestCountUnknownM], + // Aggregation: view.Count(), + // TagKeys: []tag.Key{r.namespaceTagKey, r.configTagKey, r.revisionTagKey}, + // }, + &view.View{ + Description: "The number of requests that are routed to the activator", + Measure: measurements[RequestCountM], + Aggregation: view.Count(), + TagKeys: []tag.Key{r.namespaceTagKey, r.configTagKey, r.revisionTagKey, r.servingStateKey}, + }, + &view.View{ + Description: "The response count when activator proxy the request", + Measure: measurements[ResponseCountM], + Aggregation: view.Count(), + TagKeys: []tag.Key{r.namespaceTagKey, r.configTagKey, r.revisionTagKey, r.responseCodeKey}, + }, + // &view.View{ + // Description: "The response code when activator proxy the request", + // Measure: measurements[ResponseCodeM], + // Aggregation: view.Distribution(), + // TagKeys: []tag.Key{r.namespaceTagKey, r.configTagKey, r.revisionTagKey, r.responseCodeKey}, + // }, + &view.View{ + Description: "The number of tries to get the response", + Measure: measurements[NumTriesM], + Aggregation: view.Count(), + TagKeys: []tag.Key{r.namespaceTagKey, r.configTagKey, r.revisionTagKey}, + }, + &view.View{ + Description: "The response time in seconds", + Measure: measurements[ResponseTimeInSecM], + Aggregation: view.Distribution(), + TagKeys: []tag.Key{r.namespaceTagKey, r.configTagKey, r.revisionTagKey}, + }, + ) + if err != nil { + return nil, err + } + + r.initialized = true + return r, nil +} + +// Report captures value v for measurement m. The revision rev is in namespace ns and its owner is config +func (r *Reporter) Report(ns string, config string, rev string, m Measurement, v float64) error { + if !r.initialized { + return errors.New("StatsReporter is not initialized yet") + } + + ctx, err := tag.New( + context.Background(), + tag.Insert(r.namespaceTagKey, ns), + tag.Insert(r.configTagKey, config), + tag.Insert(r.revisionTagKey, rev)) + if err != nil { + return err + } + + stats.Record(ctx, measurements[m].M(v)) + return nil +} + +// reportRequest captures value v for measurement m. +func (r *Reporter) ReportRequest(ns, config, rev, servingState string, m Measurement, v float64) error { + if !r.initialized { + return errors.New("StatsReporter is not initialized yet") + } + + ctx, err := tag.New( + context.Background(), + tag.Insert(r.namespaceTagKey, ns), + tag.Insert(r.configTagKey, config), + tag.Insert(r.revisionTagKey, rev), + tag.Insert(r.servingStateKey, servingState)) + if err != nil { + return err + } + + stats.Record(ctx, measurements[m].M(v)) + return nil +} + +// ReportResponse captures value v for measurement m. +func (r *Reporter) ReportResponse(ns, config, rev, responseCode string, m Measurement, v float64) error { + if !r.initialized { + return errors.New("StatsReporter is not initialized yet") + } + + ctx, err := tag.New( + context.Background(), + tag.Insert(r.namespaceTagKey, ns), + tag.Insert(r.configTagKey, config), + tag.Insert(r.revisionTagKey, rev), + tag.Insert(r.responseCodeKey, responseCode)) + if err != nil { + return err + } + + stats.Record(ctx, measurements[m].M(v)) + return nil +} diff --git a/pkg/activator/stats_reporter_test.go b/pkg/activator/stats_reporter_test.go new file mode 100644 index 000000000000..495f22af7722 --- /dev/null +++ b/pkg/activator/stats_reporter_test.go @@ -0,0 +1,76 @@ +/* +Copyright 2018 Google Inc. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package activator + +import ( + "testing" + + "go.opencensus.io/stats/view" +) + +// var expectedType = map[string]struct{}{ +// "LastValueData": view.LastValueData, +// "CountData": view.CountData, +// } + +func TestActivatorReporter_Report(t *testing.T) { + r := &Reporter{} + + if err := r.Report("testNs", "testConfig", "testRev", RequestCountReserveM, 1); err == nil { + t.Error("Reporter.Report() expected an error for Report call before init. Got success.") + } + + r, _ = NewStatsReporter() + wantTags := map[string]string{ + "configuration_namespace": "testns", + "configuration": "testconfig", + "revision": "testrev", + } + expectSuccess(t, func() error { return r.Report("testns", "testconfig", "testrev", RequestCountReserveM, 1) }) + expectSuccess(t, func() error { return r.Report("testns", "testconfig", "testrev", RequestCountReserveM, 1) }) + checkData(t, "request_count_reserve", wantTags, 2) +} + +func expectSuccess(t *testing.T, f func() error) { + if err := f(); err != nil { + t.Errorf("Reporter.Report() expected success but got error %v", err) + } +} + +func checkData(t *testing.T, name string, wantTags map[string]string, wantValue int) { + if d, err := view.RetrieveData(name); err != nil { + t.Errorf("Reporter.Report() error = %v, wantErr %v", err, false) + } else { + if len(d) != 1 { + t.Errorf("Reporter.Report() len(d) %v, want %v", len(d), 1) + } + for _, got := range d[0].Tags { + if want, ok := wantTags[got.Key.Name()]; !ok { + t.Errorf("Reporter.Report() got an extra tag %v: %v", got.Key.Name(), got.Value) + } else { + if got.Value != want { + t.Errorf("Reporter.Report() expected a different tag value. key:%v, got: %v, want: %v", got.Key.Name(), got.Value, want) + } + } + } + + if s, ok := d[0].Data.(*view.CountData); !ok { + t.Error("Reporter.Report() expected a CountData type") + } else { + if s.Value != (int64)(wantValue) { + t.Errorf("Reporter.Report() expected %v got %v. metric: %v", (int64)(wantValue), s.Value, name) + } + } + } +} diff --git a/pkg/controller/names.go b/pkg/controller/names.go index d8dca2ec7a0a..e9440164ba69 100644 --- a/pkg/controller/names.go +++ b/pkg/controller/names.go @@ -96,6 +96,10 @@ func GetRevisionHeaderName() string { return "Knative-Serving-Revision" } +func GetConfigurationHeader() string { + return "Knative-Serving-Configuration" +} + func GetRevisionHeaderNamespace() string { return "Knative-Serving-Namespace" } diff --git a/pkg/controller/route/istio/virtual_service.go b/pkg/controller/route/istio/virtual_service.go index cb6ec0925b18..c46a41a78f43 100644 --- a/pkg/controller/route/istio/virtual_service.go +++ b/pkg/controller/route/istio/virtual_service.go @@ -163,6 +163,7 @@ func addActivatorRoutes(r *v1alpha3.HTTPRoute, ns string, inactive []traffic.Rev }) r.AppendHeaders = map[string]string{ controller.GetRevisionHeaderName(): maxInactiveTarget.RevisionName, + controller.GetConfigurationHeader(): maxInactiveTarget.ConfigurationName, controller.GetRevisionHeaderNamespace(): ns, EnvoyTimeoutHeader: DefaultEnvoyTimeoutMs, } diff --git a/pkg/controller/route/istio/virtual_service_test.go b/pkg/controller/route/istio/virtual_service_test.go index 3f764c141879..f8ed4bc4eabc 100644 --- a/pkg/controller/route/istio/virtual_service_test.go +++ b/pkg/controller/route/istio/virtual_service_test.go @@ -281,9 +281,10 @@ func TestMakeVirtualServiceRoute_VanillaScaledToZero(t *testing.T) { Weight: 100, }}, AppendHeaders: map[string]string{ - "Knative-Serving-Revision": "revision", - "Knative-Serving-Namespace": "test-ns", - EnvoyTimeoutHeader: DefaultEnvoyTimeoutMs, + "Knative-Serving-Revision": "revision", + "Knative-Serving-Configuration": "config", + "Knative-Serving-Namespace": "test-ns", + EnvoyTimeoutHeader: DefaultEnvoyTimeoutMs, }, } if diff := cmp.Diff(&expected, route); diff != "" { @@ -323,9 +324,10 @@ func TestMakeVirtualServiceRoute_TwoInactiveTargets(t *testing.T) { Weight: 100, }}, AppendHeaders: map[string]string{ - "Knative-Serving-Revision": "revision", - "Knative-Serving-Namespace": "test-ns", - EnvoyTimeoutHeader: DefaultEnvoyTimeoutMs, + "Knative-Serving-Revision": "revision", + "Knative-Serving-Configuration": "config", + "Knative-Serving-Namespace": "test-ns", + EnvoyTimeoutHeader: DefaultEnvoyTimeoutMs, }, } if diff := cmp.Diff(&expected, route); diff != "" { diff --git a/pkg/controller/route/route_test.go b/pkg/controller/route/route_test.go index d66b2fb32a8f..b1166b099408 100644 --- a/pkg/controller/route/route_test.go +++ b/pkg/controller/route/route_test.go @@ -339,8 +339,9 @@ func TestCreateRouteForOneReserveRevision(t *testing.T) { // A route targeting the revision route := getTestRouteWithTrafficTargets( []v1alpha1.TrafficTarget{{ - RevisionName: "test-rev", - Percent: 100, + RevisionName: "test-rev", + ConfigurationName: "test-config", + Percent: 100, }}, ) servingClient.ServingV1alpha1().Routes(testNamespace).Create(route) @@ -396,6 +397,7 @@ func TestCreateRouteForOneReserveRevision(t *testing.T) { Route: []v1alpha3.DestinationWeight{getActivatorDestinationWeight(100)}, AppendHeaders: map[string]string{ ctrl.GetRevisionHeaderName(): "test-rev", + ctrl.GetConfigurationHeader(): "test-config", ctrl.GetRevisionHeaderNamespace(): testNamespace, istio.EnvoyTimeoutHeader: istio.DefaultEnvoyTimeoutMs, }, @@ -517,8 +519,9 @@ func TestCreateRouteWithOneTargetReserve(t *testing.T) { ConfigurationName: config.Name, Percent: 90, }, { - RevisionName: rev.Name, - Percent: 10, + RevisionName: rev.Name, + ConfigurationName: "test-config", + Percent: 10, }}, ) servingClient.ServingV1alpha1().Routes(testNamespace).Create(route) @@ -563,6 +566,7 @@ func TestCreateRouteWithOneTargetReserve(t *testing.T) { }, getActivatorDestinationWeight(10)}, AppendHeaders: map[string]string{ ctrl.GetRevisionHeaderName(): "test-rev", + ctrl.GetConfigurationHeader(): "test-config", ctrl.GetRevisionHeaderNamespace(): testNamespace, istio.EnvoyTimeoutHeader: istio.DefaultEnvoyTimeoutMs, }, From 3ee623d34e9050c00b418e04b77683cfbd30d87a Mon Sep 17 00:00:00 2001 From: Yao Wu Date: Mon, 9 Jul 2018 09:43:44 -0700 Subject: [PATCH 2/9] number of tries metric --- cmd/activator/main.go | 17 ++--- .../100-scaling-configmap-dev.yaml | 2 +- pkg/activator/dedupe.go | 32 +++++++- pkg/activator/revision.go | 8 +- pkg/activator/stats_reporter.go | 76 +++++++------------ 5 files changed, 69 insertions(+), 66 deletions(-) diff --git a/cmd/activator/main.go b/cmd/activator/main.go index f03748f007d7..da7fcd2af8dd 100644 --- a/cmd/activator/main.go +++ b/cmd/activator/main.go @@ -23,7 +23,6 @@ import ( "net/http" "net/http/httputil" "net/url" - "strconv" "time" "github.com/knative/serving/pkg/activator" @@ -110,10 +109,13 @@ func (rrt retryRoundTripper) RoundTrip(r *http.Request) (*http.Response, error) resp, err = transport.RoundTrip(r) } - // TODO: add metrics for number of tries and the response code. + if resp != nil { rrt.logger.Infof("It took %d tries to get response code %d", i, resp.StatusCode) - rrt.reporter.ReportResponse("default", "configuration-example", "configuration-example-00001", strconv.Itoa(resp.StatusCode), activator.ResponseCountM, 1.0) + namespace := r.Header.Get(controller.GetRevisionHeaderNamespace()) + name := r.Header.Get(controller.GetRevisionHeaderName()) + config := r.Header.Get(controller.GetConfigurationHeader()) + rrt.reporter.ReportResponse(namespace, config, name, resp.StatusCode, i, 1.0) } return resp, nil } @@ -127,12 +129,12 @@ func (a *activationHandler) handler(w http.ResponseWriter, r *http.Request) { namespace := r.Header.Get(controller.GetRevisionHeaderNamespace()) name := r.Header.Get(controller.GetRevisionHeaderName()) config := r.Header.Get(controller.GetConfigurationHeader()) - a.logger.Info("config: ", config) endpoint, status, err := a.act.ActiveEndpoint(namespace, config, name) if err != nil { msg := fmt.Sprintf("Error getting active endpoint: %v", err) - a.logger.Errorf(msg) http.Error(w, msg, int(status)) + a.logger.Errorf(msg) + a.reporter.ReportResponse(namespace, config, name, int(status), 1, 1.0) return } target := &url.URL{ @@ -190,7 +192,7 @@ func main() { } a := activator.NewRevisionActivator(kubeClient, servingClient, logger, reporter) - a = activator.NewDedupingActivator(a) + a = activator.NewDedupingActivator(a, servingClient, logger, reporter) ah := &activationHandler{a, logger, reporter} // set up signals so we handle the first shutdown signal gracefully @@ -200,9 +202,6 @@ func main() { a.Shutdown() }() - // http.HandleFunc("/", ah.handler) - // h2c.ListenAndServe(":8080", nil) - // Start the endpoint for Prometheus scraping mux := http.NewServeMux() mux.HandleFunc("/", ah.handler) diff --git a/config/monitoring/150-elasticsearch-dev/100-scaling-configmap-dev.yaml b/config/monitoring/150-elasticsearch-dev/100-scaling-configmap-dev.yaml index a601e96cdeea..8bf0972fd02f 100644 --- a/config/monitoring/150-elasticsearch-dev/100-scaling-configmap-dev.yaml +++ b/config/monitoring/150-elasticsearch-dev/100-scaling-configmap-dev.yaml @@ -855,5 +855,5 @@ data: "timezone": "", "title": "Knative Serving - Scaling Debugging", "uid": "u_-9SIMiz", - "version": 1 + "version": 2 } \ No newline at end of file diff --git a/pkg/activator/dedupe.go b/pkg/activator/dedupe.go index 3ce2ebc1daa4..eee2dcc0a07d 100644 --- a/pkg/activator/dedupe.go +++ b/pkg/activator/dedupe.go @@ -18,6 +18,10 @@ package activator import ( "fmt" "sync" + + clientset "github.com/knative/serving/pkg/client/clientset/versioned" + "go.uber.org/zap" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) var shuttingDownError = activationResult{ @@ -39,14 +43,20 @@ type dedupingActivator struct { pendingRequests map[revisionID][]chan activationResult activator Activator shutdown bool + knaClient clientset.Interface + logger *zap.SugaredLogger + reporter StatsReporter } // NewDedupingActivator creates an Activator that deduplicates // activations requests for the same revision id and namespace. -func NewDedupingActivator(a Activator) Activator { +func NewDedupingActivator(a Activator, knaClient clientset.Interface, logger *zap.SugaredLogger, r StatsReporter) Activator { return &dedupingActivator{ pendingRequests: make(map[revisionID][]chan activationResult), activator: a, + knaClient: knaClient, + logger: logger, + reporter: r, } } @@ -88,9 +98,15 @@ func (a *dedupingActivator) dedupe(id revisionID, ch chan activationResult) { } func (a *dedupingActivator) activate(id revisionID) { - endpoint, status, err := a.activator.ActiveEndpoint(id.namespace, id.configuration, id.name) + logger := loggerWithRevisionInfo(a.logger, id.namespace, id.name) a.mux.Lock() defer a.mux.Unlock() + if reqs, ok := a.pendingRequests[id]; ok { + if err := a.reportRequests(id, len(reqs)); err != nil { + logger.Errorf("Failed to report request count metrics for revision %s for namespace %s", id.name, id.namespace) + } + } + endpoint, status, err := a.activator.ActiveEndpoint(id.namespace, id.configuration, id.name) result := activationResult{ endpoint: endpoint, status: status, @@ -103,3 +119,15 @@ func (a *dedupingActivator) activate(id revisionID) { } } } + +func (a *dedupingActivator) reportRequests(id revisionID, count int) error { + logger := loggerWithRevisionInfo(a.logger, id.namespace, id.name) + revisionClient := a.knaClient.ServingV1alpha1().Revisions(id.namespace) + revision, err := revisionClient.Get(id.name, metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("Unable to get revision %s for namespace: %s", id.name, id.namespace) + } + a.reporter.ReportRequest(id.namespace, id.configuration, id.name, string(revision.Spec.ServingState), RequestCountM, float64(count)) + logger.Infof("Wrote request_count metric for revision %s for namespace %s with value %d", id.name, id.namespace, count) + return nil +} diff --git a/pkg/activator/revision.go b/pkg/activator/revision.go index 0650aea5fb72..4155c0f1e360 100644 --- a/pkg/activator/revision.go +++ b/pkg/activator/revision.go @@ -76,20 +76,16 @@ func (r *revisionActivator) ActiveEndpoint(namespace, configuration, name string switch revision.Spec.ServingState { default: - //r.reporter.Report(namespace, configuration, name, RequestCountUnknownM, 1.0) r.reporter.ReportRequest(namespace, configuration, name, "Unknown", RequestCountM, 1.0) return internalError("Disregarding activation request for revision in unknown state %v", revision.Spec.ServingState) case v1alpha1.RevisionServingStateRetired: - //r.reporter.Report(namespace, configuration, name, RequestCountRetiredM, 1.0) r.reporter.ReportRequest(namespace, configuration, name, "Retired", RequestCountM, 1.0) return internalError("Disregarding activation request for retired revision ") case v1alpha1.RevisionServingStateActive: // Revision is already active. Nothing to do - //r.reporter.Report(namespace, configuration, name, RequestCountActiveM, 1.0) - r.reporter.ReportRequest(namespace, configuration, name, "Active", RequestCountM, 1.0) + //r.reporter.ReportRequest(namespace, configuration, name, "Active", RequestCountM, 1.0) case v1alpha1.RevisionServingStateReserve: - //r.reporter.Report(namespace, configuration, name, RequestCountReserveM, 1.0) - r.reporter.ReportRequest(namespace, configuration, name, "Reserve", RequestCountM, 1.0) + //r.reporter.ReportRequest(namespace, configuration, name, "Reserve", RequestCountM, 1.0) // Activate the revision revision.Spec.ServingState = v1alpha1.RevisionServingStateActive if _, err := revisionClient.Update(revision); err != nil { diff --git a/pkg/activator/stats_reporter.go b/pkg/activator/stats_reporter.go index ed3ac6e20b9b..ed98b1a894e2 100644 --- a/pkg/activator/stats_reporter.go +++ b/pkg/activator/stats_reporter.go @@ -16,6 +16,7 @@ package activator import ( "context" "errors" + "strconv" "go.opencensus.io/stats" "go.opencensus.io/stats/view" @@ -47,8 +48,9 @@ const ( // ResponseCodeM is the response code when activator proxy the request // ResponseCodeM - // NumTriesM is the number of tries to get the response code - NumTriesM + // // NumTriesM is the number of tries to get the response code + // NumTriesM + // ResponseTimeInSecM is the response time in seconds ResponseTimeInSecM ) @@ -76,17 +78,17 @@ var ( "The number of requests that are routed to the activator", stats.UnitNone), ResponseCountM: stats.Float64( - "response_count", + "revision_response_count", "The response count when activator proxy the request", stats.UnitNone), // ResponseCodeM: stats.Float64( // "response_code", // "The response code when activator proxy the request", // stats.UnitNone), - NumTriesM: stats.Float64( - "num_tries", - "The number of tries to get the response", - stats.UnitNone), + // NumTriesM: stats.Float64( + // "num_tries", + // "The number of tries to get the response", + // stats.UnitNone), ResponseTimeInSecM: stats.Float64( "response_time_seconds", "The response time in seconds", @@ -96,9 +98,9 @@ var ( // StatsReporter defines the interface for sending activator metrics type StatsReporter interface { - Report(ns string, config string, rev string, m Measurement, v float64) error + //Report(ns string, config string, rev string, m Measurement, v float64) error ReportRequest(ns, config, rev, servingState string, m Measurement, v float64) error - ReportResponse(ns, config, rev, responseCode string, m Measurement, v float64) error + ReportResponse(ns, config, rev string, responseCode, numTries int, v float64) error } // Reporter holds cached metric objects to report autoscaler metrics @@ -109,6 +111,7 @@ type Reporter struct { revisionTagKey tag.Key servingStateKey tag.Key responseCodeKey tag.Key + numTriesKey tag.Key } // NewStatsReporter creates a reporter that collects and reports activator metrics @@ -132,53 +135,35 @@ func NewStatsReporter() (*Reporter, error) { return nil, err } r.revisionTagKey = revTag - servingStateTag, err := tag.NewKey("servingState") + servingStateTag, err := tag.NewKey("serving_state") if err != nil { return nil, err } r.servingStateKey = servingStateTag - responseCodeTag, err := tag.NewKey("responseCode") + responseCodeTag, err := tag.NewKey("response_code") if err != nil { return nil, err } r.responseCodeKey = responseCodeTag + numTriesTag, err := tag.NewKey("num_tries") + if err != nil { + return nil, err + } + r.numTriesKey = numTriesTag // Create view to see our measurements. err = view.Register( - // &view.View{ - // Description: "The number of requests that are routed to the activator when the revision is Reserve", - // Measure: measurements[RequestCountReserveM], - // Aggregation: view.Count(), - // TagKeys: []tag.Key{r.namespaceTagKey, r.configTagKey, r.revisionTagKey}, - // }, - // &view.View{ - // Description: "The number of requests that are routed to the activator when the revision is Active", - // Measure: measurements[RequestCountActiveM], - // Aggregation: view.Count(), - // TagKeys: []tag.Key{r.namespaceTagKey, r.configTagKey, r.revisionTagKey}, - // }, - // &view.View{ - // Description: "The number of requests that are routed to the activator when the revision is Retired", - // Measure: measurements[RequestCountRetiredM], - // Aggregation: view.Count(), - // TagKeys: []tag.Key{r.namespaceTagKey, r.configTagKey, r.revisionTagKey}, - // }, - // &view.View{ - // Description: "The number of requests that are routed to the activator when the revision is not Active, Reserve, and Retired", - // Measure: measurements[RequestCountUnknownM], - // Aggregation: view.Count(), - // TagKeys: []tag.Key{r.namespaceTagKey, r.configTagKey, r.revisionTagKey}, - // }, &view.View{ Description: "The number of requests that are routed to the activator", Measure: measurements[RequestCountM], - Aggregation: view.Count(), - TagKeys: []tag.Key{r.namespaceTagKey, r.configTagKey, r.revisionTagKey, r.servingStateKey}, + Aggregation: view.Sum(), + //Aggregation: view.Count(), + TagKeys: []tag.Key{r.namespaceTagKey, r.configTagKey, r.revisionTagKey, r.servingStateKey}, }, &view.View{ Description: "The response count when activator proxy the request", Measure: measurements[ResponseCountM], - Aggregation: view.Count(), - TagKeys: []tag.Key{r.namespaceTagKey, r.configTagKey, r.revisionTagKey, r.responseCodeKey}, + Aggregation: view.Sum(), + TagKeys: []tag.Key{r.namespaceTagKey, r.configTagKey, r.revisionTagKey, r.responseCodeKey, r.numTriesKey}, }, // &view.View{ // Description: "The response code when activator proxy the request", @@ -186,12 +171,6 @@ func NewStatsReporter() (*Reporter, error) { // Aggregation: view.Distribution(), // TagKeys: []tag.Key{r.namespaceTagKey, r.configTagKey, r.revisionTagKey, r.responseCodeKey}, // }, - &view.View{ - Description: "The number of tries to get the response", - Measure: measurements[NumTriesM], - Aggregation: view.Count(), - TagKeys: []tag.Key{r.namespaceTagKey, r.configTagKey, r.revisionTagKey}, - }, &view.View{ Description: "The response time in seconds", Measure: measurements[ResponseTimeInSecM], @@ -247,7 +226,7 @@ func (r *Reporter) ReportRequest(ns, config, rev, servingState string, m Measure } // ReportResponse captures value v for measurement m. -func (r *Reporter) ReportResponse(ns, config, rev, responseCode string, m Measurement, v float64) error { +func (r *Reporter) ReportResponse(ns, config, rev string, responseCode, numTries int, v float64) error { if !r.initialized { return errors.New("StatsReporter is not initialized yet") } @@ -257,11 +236,12 @@ func (r *Reporter) ReportResponse(ns, config, rev, responseCode string, m Measur tag.Insert(r.namespaceTagKey, ns), tag.Insert(r.configTagKey, config), tag.Insert(r.revisionTagKey, rev), - tag.Insert(r.responseCodeKey, responseCode)) + tag.Insert(r.responseCodeKey, strconv.Itoa(responseCode)), + tag.Insert(r.numTriesKey, strconv.Itoa(numTries))) if err != nil { return err } - stats.Record(ctx, measurements[m].M(v)) + stats.Record(ctx, measurements[ResponseCountM].M(v)) return nil } From 08ed16bbed7e4c5b19cb0c1282715894a256d9e2 Mon Sep 17 00:00:00 2001 From: Yao Wu Date: Mon, 9 Jul 2018 11:16:11 -0700 Subject: [PATCH 3/9] add response time metrics --- cmd/activator/main.go | 15 ++++-- pkg/activator/stats_reporter.go | 94 ++++++++++++--------------------- 2 files changed, 43 insertions(+), 66 deletions(-) diff --git a/cmd/activator/main.go b/cmd/activator/main.go index da7fcd2af8dd..23ccc2ddf2e8 100644 --- a/cmd/activator/main.go +++ b/cmd/activator/main.go @@ -115,26 +115,30 @@ func (rrt retryRoundTripper) RoundTrip(r *http.Request) (*http.Response, error) namespace := r.Header.Get(controller.GetRevisionHeaderNamespace()) name := r.Header.Get(controller.GetRevisionHeaderName()) config := r.Header.Get(controller.GetConfigurationHeader()) - rrt.reporter.ReportResponse(namespace, config, name, resp.StatusCode, i, 1.0) + rrt.reporter.ReportResponseCount(namespace, config, name, resp.StatusCode, i, 1.0) } return resp, nil } func (a *activationHandler) handler(w http.ResponseWriter, r *http.Request) { + namespace := r.Header.Get(controller.GetRevisionHeaderNamespace()) + name := r.Header.Get(controller.GetRevisionHeaderName()) + config := r.Header.Get(controller.GetConfigurationHeader()) + start := time.Now() + if r.ContentLength > maxUploadBytes { w.WriteHeader(http.StatusRequestEntityTooLarge) + a.reporter.ReportResponseTime(namespace, config, name, time.Now().Sub(start)) return } - namespace := r.Header.Get(controller.GetRevisionHeaderNamespace()) - name := r.Header.Get(controller.GetRevisionHeaderName()) - config := r.Header.Get(controller.GetConfigurationHeader()) endpoint, status, err := a.act.ActiveEndpoint(namespace, config, name) if err != nil { msg := fmt.Sprintf("Error getting active endpoint: %v", err) http.Error(w, msg, int(status)) a.logger.Errorf(msg) - a.reporter.ReportResponse(namespace, config, name, int(status), 1, 1.0) + a.reporter.ReportResponseCount(namespace, config, name, int(status), 1, 1.0) + a.reporter.ReportResponseTime(namespace, config, name, time.Now().Sub(start)) return } target := &url.URL{ @@ -152,6 +156,7 @@ func (a *activationHandler) handler(w http.ResponseWriter, r *http.Request) { r.Host = "" proxy.ServeHTTP(w, r) + a.reporter.ReportResponseTime(namespace, config, name, time.Now().Sub(start)) } func main() { diff --git a/pkg/activator/stats_reporter.go b/pkg/activator/stats_reporter.go index ed98b1a894e2..9753e1fef7ce 100644 --- a/pkg/activator/stats_reporter.go +++ b/pkg/activator/stats_reporter.go @@ -17,6 +17,7 @@ import ( "context" "errors" "strconv" + "time" "go.opencensus.io/stats" "go.opencensus.io/stats/view" @@ -27,52 +28,18 @@ import ( type Measurement int const ( - // // RequestCountReserveM is the requests count that are routed to the activator when - // // the revision is Reserve - // RequestCountReserveM Measurement = iota - // // RequestCountActiveM is the requests count that are routed to the activator when - // // the revision is Active - // RequestCountActiveM - // // RequestCountRetiredM is the requests count that are routed to the activator when - // // the revision is Retired - // RequestCountRetiredM - // // RequestCountUnknownM is the requests count that are routed to the activator when - // // the revision is not Active, Reserve, and Retired - // RequestCountUnknownM - // RequestCountM is the requests count that are routed to the activator RequestCountM Measurement = iota //ResponseCountM is the response count when activator proxy the request ResponseCountM - // ResponseCodeM is the response code when activator proxy the request - // ResponseCodeM - - // // NumTriesM is the number of tries to get the response code - // NumTriesM - // ResponseTimeInSecM is the response time in seconds - ResponseTimeInSecM + // ResponseTimeInMsecM is the response time in millisecond + ResponseTimeInMsecM ) var ( measurements = []*stats.Float64Measure{ - // RequestCountReserveM: stats.Float64( - // "request_count_reserve", - // "The number of requests that are routed to the activator when the revision is Reserve", - // stats.UnitNone), - // RequestCountActiveM: stats.Float64( - // "request_count_active", - // "The number of requests that are routed to the activator when the revision is Active", - // stats.UnitNone), - // RequestCountRetiredM: stats.Float64( - // "request_count_retired", - // "The number of requests that are routed to the activator when the revision is Retired", - // stats.UnitNone), - // RequestCountUnknownM: stats.Float64( - // "request_count_unknown", - // "The number of requests that are routed to the activator when the revision is not Active, Reserve, and Retired", - // stats.UnitNone), RequestCountM: stats.Float64( "revision_request_count", "The number of requests that are routed to the activator", @@ -81,17 +48,9 @@ var ( "revision_response_count", "The response count when activator proxy the request", stats.UnitNone), - // ResponseCodeM: stats.Float64( - // "response_code", - // "The response code when activator proxy the request", - // stats.UnitNone), - // NumTriesM: stats.Float64( - // "num_tries", - // "The number of tries to get the response", - // stats.UnitNone), - ResponseTimeInSecM: stats.Float64( - "response_time_seconds", - "The response time in seconds", + ResponseTimeInMsecM: stats.Float64( + "response_time_msec", + "The response time in millisecond", stats.UnitNone), } ) @@ -100,7 +59,8 @@ var ( type StatsReporter interface { //Report(ns string, config string, rev string, m Measurement, v float64) error ReportRequest(ns, config, rev, servingState string, m Measurement, v float64) error - ReportResponse(ns, config, rev string, responseCode, numTries int, v float64) error + ReportResponseCount(ns, config, rev string, responseCode, numTries int, v float64) error + ReportResponseTime(ns, config, rev string, d time.Duration) error } // Reporter holds cached metric objects to report autoscaler metrics @@ -156,8 +116,7 @@ func NewStatsReporter() (*Reporter, error) { Description: "The number of requests that are routed to the activator", Measure: measurements[RequestCountM], Aggregation: view.Sum(), - //Aggregation: view.Count(), - TagKeys: []tag.Key{r.namespaceTagKey, r.configTagKey, r.revisionTagKey, r.servingStateKey}, + TagKeys: []tag.Key{r.namespaceTagKey, r.configTagKey, r.revisionTagKey, r.servingStateKey}, }, &view.View{ Description: "The response count when activator proxy the request", @@ -165,16 +124,10 @@ func NewStatsReporter() (*Reporter, error) { Aggregation: view.Sum(), TagKeys: []tag.Key{r.namespaceTagKey, r.configTagKey, r.revisionTagKey, r.responseCodeKey, r.numTriesKey}, }, - // &view.View{ - // Description: "The response code when activator proxy the request", - // Measure: measurements[ResponseCodeM], - // Aggregation: view.Distribution(), - // TagKeys: []tag.Key{r.namespaceTagKey, r.configTagKey, r.revisionTagKey, r.responseCodeKey}, - // }, &view.View{ - Description: "The response time in seconds", - Measure: measurements[ResponseTimeInSecM], - Aggregation: view.Distribution(), + Description: "The response time in millisecond", + Measure: measurements[ResponseTimeInMsecM], + Aggregation: view.Distribution(1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000, 11000, 12000, 13000, 14000, 15000), TagKeys: []tag.Key{r.namespaceTagKey, r.configTagKey, r.revisionTagKey}, }, ) @@ -225,8 +178,8 @@ func (r *Reporter) ReportRequest(ns, config, rev, servingState string, m Measure return nil } -// ReportResponse captures value v for measurement m. -func (r *Reporter) ReportResponse(ns, config, rev string, responseCode, numTries int, v float64) error { +// ReportResponseCount captures ResponseCountM metric with value v. +func (r *Reporter) ReportResponseCount(ns, config, rev string, responseCode, numTries int, v float64) error { if !r.initialized { return errors.New("StatsReporter is not initialized yet") } @@ -245,3 +198,22 @@ func (r *Reporter) ReportResponse(ns, config, rev string, responseCode, numTries stats.Record(ctx, measurements[ResponseCountM].M(v)) return nil } + +func (r *Reporter) ReportResponseTime(ns, config, rev string, d time.Duration) error { + if !r.initialized { + return errors.New("StatsReporter is not initialized yet") + } + + ctx, err := tag.New( + context.Background(), + tag.Insert(r.namespaceTagKey, ns), + tag.Insert(r.configTagKey, config), + tag.Insert(r.revisionTagKey, rev)) + if err != nil { + return err + } + + // convert time.Duration in nanoseconds to milliseconds + stats.Record(ctx, measurements[ResponseTimeInMsecM].M(float64(d/time.Millisecond))) + return nil +} From a4a4fdf27d9e50335e8e02cc16eda83c10d95f19 Mon Sep 17 00:00:00 2001 From: Yao Wu Date: Mon, 9 Jul 2018 14:01:30 -0700 Subject: [PATCH 4/9] Update the dubugging dashboard --- .../100-scaling-configmap-dev.yaml | 2062 ++++++++++------- 1 file changed, 1232 insertions(+), 830 deletions(-) diff --git a/config/monitoring/150-elasticsearch-dev/100-scaling-configmap-dev.yaml b/config/monitoring/150-elasticsearch-dev/100-scaling-configmap-dev.yaml index 8bf0972fd02f..a541c9c0f8fd 100644 --- a/config/monitoring/150-elasticsearch-dev/100-scaling-configmap-dev.yaml +++ b/config/monitoring/150-elasticsearch-dev/100-scaling-configmap-dev.yaml @@ -19,841 +19,1243 @@ metadata: namespace: monitoring data: scaling-dashboard.json: |+ - { - "__inputs": [ - { - "name": "prometheus", - "label": "prometheus", - "description": "", - "type": "datasource", - "pluginId": "prometheus", - "pluginName": "Prometheus" - } - ], - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "5.0.3" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Prometheus", - "version": "5.0.0" - } - ], - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "description": "Knative Serving - Scaling Debugging", - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "id": null, - "iteration": 1527886043818, - "links": [ - - ], - "panels": [ - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 14, - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fill": 1, - "gridPos": { - "h": 11, - "w": 24, - "x": 0, - "y": 1 - }, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": true, - "targets": [ - { - "expr": "sum(autoscaler_actual_pod_count{configuration_namespace=\"$namespace\", configuration=\"$configuration\", revision=\"$revision\"})", - "format": "time_series", - "interval": "1s", - "intervalFactor": 1, - "legendFormat": "Actual Pods", - "refId": "A" - }, - { - "expr": "sum(autoscaler_requested_pod_count{configuration_namespace=\"$namespace\", configuration=\"$configuration\", revision=\"$revision\"})", - "format": "time_series", - "interval": "1s", - "intervalFactor": 1, - "legendFormat": "Requested Pods", - "refId": "C" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Revision Pod Counts", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "decimals": null, - "format": "short", - "label": "Concurrency", - "logBase": 1, - "max": "1", - "min": null, - "show": false - } - ] + { + "__inputs":[ + { + "name":"prometheus", + "label":"prometheus", + "description":"", + "type":"datasource", + "pluginId":"prometheus", + "pluginName":"Prometheus" } - ], - "title": "Revision Pod Counts", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 1 - }, - "id": 18, - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fill": 1, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 13 - }, - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(kube_pod_container_resource_requests_cpu_cores{namespace=\"$namespace\", pod=~\"$revision-deployment-.*\"})", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "Cores requested", - "refId": "A" - }, - { - "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod_name=~\"$revision-deployment-.*\"}[1m]))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Cores used", - "refId": "B" - }, - { - "expr": "sum(kube_pod_container_resource_limits_cpu_cores{namespace=\"$namespace\", pod=~\"$revision-deployment-.*\"})", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Core limit", - "refId": "C" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Revision CPU Usage", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] + ], + "__requires":[ + { + "type":"grafana", + "id":"grafana", + "name":"Grafana", + "version":"5.0.3" }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fill": 1, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 13 - }, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\", pod=~\"$revision-deployment-.*\"})", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "Memory requested", - "refId": "A" - }, - { - "expr": "sum(container_memory_usage_bytes{namespace=\"$namespace\", pod_name=~\"$revision-deployment-.*\"})", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "Memory used", - "refId": "B" - }, - { - "expr": "sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\", pod=~\"$revision-deployment-.*\"})", - "format": "time_series", - "intervalFactor": 1, - "refId": "C" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Pod Memory Usage", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "title": "Resource Usages", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 2 - }, - "id": 16, - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fill": 1, - "gridPos": { - "h": 10, - "w": 24, - "x": 0, - "y": 3 - }, - "id": 10, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": true, - "targets": [ - { - "expr": "sum(autoscaler_desired_pod_count{configuration_namespace=\"$namespace\", configuration=\"$configuration\", revision=\"$revision\"}) ", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Desired Pods", - "refId": "A" - }, - { - "expr": "sum(autoscaler_observed_pod_count{configuration_namespace=\"$namespace\", configuration=\"$configuration\", revision=\"$revision\"})", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Observed Pods", - "refId": "B" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Pod Counts", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fill": 1, - "gridPos": { - "h": 9, - "w": 24, - "x": 0, - "y": 13 - }, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Panic Mode", - "color": "#ea6460", - "dashes": true, - "fill": 2, - "linewidth": 2, - "steppedLine": true, - "yaxis": 2 - }, - { - "alias": "Target Concurrency Per Pod", - "color": "#0a50a1", - "dashes": true, - "steppedLine": false - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": true, - "targets": [ - { - "expr": "sum(autoscaler_observed_stable_concurrency{configuration_namespace=\"$namespace\", configuration=\"$configuration\", revision=\"$revision\"})", - "format": "time_series", - "interval": "1s", - "intervalFactor": 1, - "legendFormat": "Stable Concurrency", - "refId": "A" - }, - { - "expr": "sum(autoscaler_observed_panic_concurrency{configuration_namespace=\"$namespace\", configuration=\"$configuration\", revision=\"$revision\"})", - "format": "time_series", - "interval": "1s", - "intervalFactor": 1, - "legendFormat": "Panic Concurrency", - "refId": "B" - }, - { - "expr": "sum(autoscaler_target_concurrency_per_pod{configuration_namespace=\"$namespace\", configuration=\"$configuration\", revision=\"$revision\"})", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Target Concurrency Per Pod", - "refId": "C" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Observed Concurrency", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": "", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": "", - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] + { + "id":"graph", + "name":"Graph", + "type":"panel", + "version":"5.0.0" }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "decimals": null, - "fill": 1, - "gridPos": { - "h": 9, - "w": 24, - "x": 0, - "y": 22 - }, - "id": 12, - "legend": { - "avg": false, - "current": false, - "hideZero": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Panic Mode", - "color": "#e24d42", - "linewidth": 2, - "yaxis": 2 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": true, - "targets": [ - { - "expr": "sum(autoscaler_panic_mode{configuration_namespace=\"$namespace\", configuration=\"$configuration\", revision=\"$revision\"} )", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Panic Mode", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Panic Mode", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": "1.0", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] + { + "type":"datasource", + "id":"prometheus", + "name":"Prometheus", + "version":"5.0.0" } - ], - "title": "Debugging Metrics", - "type": "row" - } - ], - "refresh": false, - "schemaVersion": 16, - "style": "dark", - "tags": [ - - ], - "templating": { - "list": [ - { - "allValue": null, - "current": { - + ], + "annotations":{ + "list":[ + { + "builtIn":1, + "datasource":"-- Grafana --", + "enable":true, + "hide":true, + "iconColor":"rgba(0, 211, 255, 1)", + "name":"Annotations & Alerts", + "type":"dashboard" + } + ] + }, + "description":"Knative Serving - Scaling Debugging", + "editable":false, + "gnetId":null, + "graphTooltip":0, + "id":null, + "iteration":1527886043818, + "links":[ + + ], + "panels":[ + { + "collapsed":true, + "gridPos":{ + "h":1, + "w":24, + "x":0, + "y":0 + }, + "id":20, + "panels":[ + { + "aliasColors":{ + + }, + "bars":false, + "dashLength":10, + "dashes":false, + "datasource":"prometheus", + "fill":1, + "gridPos":{ + "h":11, + "w":24, + "x":0, + "y":1 + }, + "id":22, + "legend":{ + "avg":false, + "current":false, + "max":false, + "min":false, + "show":true, + "total":false, + "values":false + }, + "lines":true, + "linewidth":1, + "links":[ + + ], + "nullPointMode":"null", + "percentage":false, + "pointradius":5, + "points":false, + "renderer":"flot", + "seriesOverrides":[ + + ], + "spaceLength":10, + "stack":false, + "steppedLine":false, + "targets":[ + { + "expr":"label_replace(sum(increase(activator_revision_request_count{destination_namespace=\"$namespace\", destination_configuration=~\"$configuration\",destination_revision=~\"$revision\"}[1m])) by (destination_revision), \"destination_revision\", \"$2\", \"destination_revision\", \"$configuration(-+)(.*)\")", + "format":"time_series", + "interval":"", + "intervalFactor":1, + "legendFormat":"{{destination_revision}}", + "refId":"A" + } + ], + "thresholds":[ + + ], + "timeFrom":null, + "timeShift":null, + "title":"Request Count in last minute by Revision", + "tooltip":{ + "shared":true, + "sort":0, + "value_type":"individual" + }, + "type":"graph", + "xaxis":{ + "buckets":null, + "mode":"time", + "name":null, + "show":true, + "values":[ + + ] + }, + "yaxes":[ + { + "format":"none", + "label":null, + "logBase":1, + "max":null, + "min":"0", + "show":true + }, + { + "format":"short", + "label":null, + "logBase":1, + "max":null, + "min":null, + "show":true + } + ] + }, + { + "aliasColors":{ + + }, + "bars":false, + "dashLength":10, + "dashes":false, + "datasource":"prometheus", + "fill":1, + "gridPos":{ + "h":10, + "w":24, + "x":0, + "y":12 + }, + "id":24, + "legend":{ + "avg":false, + "current":false, + "max":false, + "min":false, + "show":true, + "total":false, + "values":false + }, + "lines":true, + "linewidth":1, + "links":[ + + ], + "nullPointMode":"null", + "percentage":false, + "pointradius":5, + "points":false, + "renderer":"flot", + "seriesOverrides":[ + + ], + "spaceLength":10, + "stack":false, + "steppedLine":false, + "targets":[ + { + "expr":"round(sum(increase(activator_revision_response_count{destination_namespace=\"$namespace\", destination_configuration=~\"$configuration\",destination_revision=~\"$revision\"}[1m])) by (response_code))", + "format":"time_series", + "intervalFactor":1, + "legendFormat":"{{ response_code }}", + "refId":"A" + } + ], + "thresholds":[ + + ], + "timeFrom":null, + "timeShift":null, + "title":"Response Count in last minute by Response Code", + "tooltip":{ + "shared":true, + "sort":0, + "value_type":"individual" + }, + "type":"graph", + "xaxis":{ + "buckets":null, + "mode":"time", + "name":null, + "show":true, + "values":[ + + ] + }, + "yaxes":[ + { + "format":"none", + "label":null, + "logBase":1, + "max":null, + "min":"0", + "show":true + }, + { + "format":"short", + "label":null, + "logBase":1, + "max":null, + "min":null, + "show":true + } + ] + }, + { + "aliasColors":{ + + }, + "bars":false, + "dashLength":10, + "dashes":false, + "datasource":"prometheus", + "fill":1, + "gridPos":{ + "h":10, + "w":24, + "x":0, + "y":22 + }, + "id":26, + "legend":{ + "avg":false, + "current":false, + "max":false, + "min":false, + "show":true, + "total":false, + "values":false + }, + "lines":true, + "linewidth":1, + "links":[ + + ], + "nullPointMode":"null", + "percentage":false, + "pointradius":5, + "points":false, + "renderer":"flot", + "seriesOverrides":[ + + ], + "spaceLength":10, + "stack":false, + "steppedLine":false, + "targets":[ + { + "expr":"sum(increase(activator_revision_response_count{destination_namespace=\"$namespace\", destination_configuration=~\"$configuration\",destination_revision=~\"$revision\"}[1m]))", + "format":"time_series", + "intervalFactor":1, + "legendFormat":"{{num_tries}}", + "refId":"A" + } + ], + "thresholds":[ + + ], + "timeFrom":null, + "timeShift":null, + "title":"Response Count in last minute by number of tries", + "tooltip":{ + "shared":true, + "sort":0, + "value_type":"individual" + }, + "type":"graph", + "xaxis":{ + "buckets":null, + "mode":"time", + "name":null, + "show":true, + "values":[ + + ] + }, + "yaxes":[ + { + "format":"short", + "label":null, + "logBase":1, + "max":null, + "min":"0", + "show":true + }, + { + "format":"short", + "label":null, + "logBase":1, + "max":null, + "min":null, + "show":true + } + ] + }, + { + "aliasColors":{ + + }, + "bars":false, + "dashLength":10, + "dashes":false, + "datasource":"prometheus", + "fill":1, + "gridPos":{ + "h":10, + "w":24, + "x":0, + "y":32 + }, + "id":28, + "legend":{ + "avg":true, + "current":false, + "max":false, + "min":false, + "show":true, + "total":false, + "values":true + }, + "lines":true, + "linewidth":1, + "links":[ + + ], + "nullPointMode":"null", + "percentage":false, + "pointradius":5, + "points":false, + "renderer":"flot", + "seriesOverrides":[ + + ], + "spaceLength":10, + "stack":false, + "steppedLine":false, + "targets":[ + { + "expr":"label_replace(histogram_quantile(0.50, sum(rate(activator_response_time_msec_bucket{destination_namespace=\"$namespace\", destination_configuration=~\"$configuration\",destination_revision=~\"$revision\"}[1m])) by (destination_revision, le)), \"destination_revision\", \"$2\", \"destination_revision\", \"$configuration(-+)(.*)\")", + "format":"time_series", + "intervalFactor":1, + "legendFormat":"{{ destination_revision }} (p50)", + "refId":"A" + }, + { + "expr":"label_replace(histogram_quantile(0.90, sum(rate(activator_response_time_msec_bucket{destination_namespace=\"$namespace\", destination_configuration=~\"$configuration\",destination_revision=~\"$revision\"}[1m])) by (destination_revision, le)), \"destination_revision\", \"$2\", \"destination_revision\", \"$configuration(-+)(.*)\")", + "format":"time_series", + "intervalFactor":1, + "legendFormat":"{{ destination_revision }} (p90)", + "refId":"B" + }, + { + "expr":"label_replace(histogram_quantile(0.95, sum(rate(activator_response_time_msec_bucket{destination_namespace=\"$namespace\", destination_configuration=~\"$configuration\",destination_revision=~\"$revision\"}[1m])) by (destination_revision, le)), \"destination_revision\", \"$2\", \"destination_revision\", \"$configuration(-+)(.*)\")", + "format":"time_series", + "intervalFactor":1, + "legendFormat":"{{ destination_revision }} (p95)", + "refId":"C" + }, + { + "expr":"label_replace(histogram_quantile(0.99, sum(rate(activator_response_time_msec_bucket{destination_namespace=\"$namespace\", destination_configuration=~\"$configuration\",destination_revision=~\"$revision\"}[1m])) by (destination_revision, le)), \"destination_revision\", \"$2\", \"destination_revision\", \"$configuration(-+)(.*)\")", + "format":"time_series", + "intervalFactor":1, + "legendFormat":"{{ destination_revision }} (p99)", + "refId":"D" + } + ], + "thresholds":[ + + ], + "timeFrom":null, + "timeShift":null, + "title":"Response Time in last minute", + "tooltip":{ + "shared":true, + "sort":0, + "value_type":"individual" + }, + "type":"graph", + "xaxis":{ + "buckets":null, + "mode":"time", + "name":null, + "show":true, + "values":[ + + ] + }, + "yaxes":[ + { + "format":"ms", + "label":null, + "logBase":1, + "max":null, + "min":null, + "show":true + }, + { + "format":"short", + "label":null, + "logBase":1, + "max":null, + "min":null, + "show":true + } + ] + } + ], + "title":"Activator Metrics", + "type":"row" }, - "datasource": "prometheus", - "hide": 0, - "includeAll": false, - "label": "Namespace", - "multi": false, - "name": "namespace", - "options": [ - - ], - "query": "label_values(autoscaler_actual_pod_count, configuration_namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - + { + "collapsed":true, + "gridPos":{ + "h":1, + "w":24, + "x":0, + "y":0 + }, + "id":14, + "panels":[ + { + "aliasColors":{ + + }, + "bars":false, + "dashLength":10, + "dashes":false, + "datasource":"prometheus", + "fill":1, + "gridPos":{ + "h":11, + "w":24, + "x":0, + "y":1 + }, + "id":2, + "legend":{ + "avg":false, + "current":false, + "max":false, + "min":false, + "show":true, + "total":false, + "values":false + }, + "lines":true, + "linewidth":1, + "links":[ + + ], + "nullPointMode":"null", + "percentage":false, + "pointradius":5, + "points":false, + "renderer":"flot", + "seriesOverrides":[ + + ], + "spaceLength":10, + "stack":false, + "steppedLine":true, + "targets":[ + { + "expr":"sum(autoscaler_actual_pod_count{configuration_namespace=\"$namespace\", configuration=\"$configuration\", revision=\"$revision\"})", + "format":"time_series", + "interval":"1s", + "intervalFactor":1, + "legendFormat":"Actual Pods", + "refId":"A" + }, + { + "expr":"sum(autoscaler_requested_pod_count{configuration_namespace=\"$namespace\", configuration=\"$configuration\", revision=\"$revision\"})", + "format":"time_series", + "interval":"1s", + "intervalFactor":1, + "legendFormat":"Requested Pods", + "refId":"C" + } + ], + "thresholds":[ + + ], + "timeFrom":null, + "timeShift":null, + "title":"Revision Pod Counts", + "tooltip":{ + "shared":true, + "sort":0, + "value_type":"individual" + }, + "type":"graph", + "xaxis":{ + "buckets":null, + "mode":"time", + "name":null, + "show":true, + "values":[ + + ] + }, + "yaxes":[ + { + "format":"short", + "label":null, + "logBase":1, + "max":null, + "min":null, + "show":true + }, + { + "decimals":null, + "format":"short", + "label":"Concurrency", + "logBase":1, + "max":"1", + "min":null, + "show":false + } + ] + } + ], + "title":"Revision Pod Counts", + "type":"row" }, - "datasource": "prometheus", - "hide": 0, - "includeAll": false, - "label": "Configuration", - "multi": false, - "name": "configuration", - "options": [ - - ], - "query": "label_values(autoscaler_actual_pod_count{configuration_namespace=\"$namespace\"}, configuration)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - + { + "collapsed":true, + "gridPos":{ + "h":1, + "w":24, + "x":0, + "y":1 + }, + "id":18, + "panels":[ + { + "aliasColors":{ + + }, + "bars":false, + "dashLength":10, + "dashes":false, + "datasource":"prometheus", + "fill":1, + "gridPos":{ + "h":9, + "w":12, + "x":0, + "y":13 + }, + "id":4, + "legend":{ + "avg":false, + "current":false, + "max":false, + "min":false, + "show":true, + "total":false, + "values":false + }, + "lines":true, + "linewidth":1, + "links":[ + + ], + "nullPointMode":"null", + "percentage":false, + "pointradius":5, + "points":false, + "renderer":"flot", + "seriesOverrides":[ + + ], + "spaceLength":10, + "stack":false, + "steppedLine":false, + "targets":[ + { + "expr":"sum(kube_pod_container_resource_requests_cpu_cores{namespace=\"$namespace\", pod=~\"$revision-deployment-.*\"})", + "format":"time_series", + "interval":"", + "intervalFactor":1, + "legendFormat":"Cores requested", + "refId":"A" + }, + { + "expr":"sum(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod_name=~\"$revision-deployment-.*\"}[1m]))", + "format":"time_series", + "intervalFactor":1, + "legendFormat":"Cores used", + "refId":"B" + }, + { + "expr":"sum(kube_pod_container_resource_limits_cpu_cores{namespace=\"$namespace\", pod=~\"$revision-deployment-.*\"})", + "format":"time_series", + "intervalFactor":1, + "legendFormat":"Core limit", + "refId":"C" + } + ], + "thresholds":[ + + ], + "timeFrom":null, + "timeShift":null, + "title":"Revision CPU Usage", + "tooltip":{ + "shared":true, + "sort":2, + "value_type":"individual" + }, + "type":"graph", + "xaxis":{ + "buckets":null, + "mode":"time", + "name":null, + "show":true, + "values":[ + + ] + }, + "yaxes":[ + { + "decimals":null, + "format":"short", + "label":null, + "logBase":1, + "max":null, + "min":null, + "show":true + }, + { + "format":"short", + "label":null, + "logBase":1, + "max":null, + "min":null, + "show":false + } + ] + }, + { + "aliasColors":{ + + }, + "bars":false, + "dashLength":10, + "dashes":false, + "datasource":"prometheus", + "fill":1, + "gridPos":{ + "h":9, + "w":12, + "x":12, + "y":13 + }, + "id":6, + "legend":{ + "avg":false, + "current":false, + "max":false, + "min":false, + "show":true, + "total":false, + "values":false + }, + "lines":true, + "linewidth":1, + "links":[ + + ], + "nullPointMode":"null", + "percentage":false, + "pointradius":5, + "points":false, + "renderer":"flot", + "seriesOverrides":[ + + ], + "spaceLength":10, + "stack":false, + "steppedLine":false, + "targets":[ + { + "expr":"sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\", pod=~\"$revision-deployment-.*\"})", + "format":"time_series", + "interval":"", + "intervalFactor":1, + "legendFormat":"Memory requested", + "refId":"A" + }, + { + "expr":"sum(container_memory_usage_bytes{namespace=\"$namespace\", pod_name=~\"$revision-deployment-.*\"})", + "format":"time_series", + "hide":false, + "intervalFactor":1, + "legendFormat":"Memory used", + "refId":"B" + }, + { + "expr":"sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\", pod=~\"$revision-deployment-.*\"})", + "format":"time_series", + "intervalFactor":1, + "refId":"C" + } + ], + "thresholds":[ + + ], + "timeFrom":null, + "timeShift":null, + "title":"Pod Memory Usage", + "tooltip":{ + "shared":true, + "sort":2, + "value_type":"individual" + }, + "type":"graph", + "xaxis":{ + "buckets":null, + "mode":"time", + "name":null, + "show":true, + "values":[ + + ] + }, + "yaxes":[ + { + "format":"decbytes", + "label":null, + "logBase":1, + "max":null, + "min":null, + "show":true + }, + { + "format":"short", + "label":null, + "logBase":1, + "max":null, + "min":null, + "show":false + } + ] + } + ], + "title":"Resource Usages", + "type":"row" }, - "datasource": "prometheus", - "hide": 0, - "includeAll": false, - "label": "Revision", - "multi": false, - "name": "revision", - "options": [ - - ], - "query": "label_values(autoscaler_actual_pod_count{configuration_namespace=\"$namespace\", configuration=\"$configuration\"}, revision)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ - + { + "collapsed":true, + "gridPos":{ + "h":1, + "w":24, + "x":0, + "y":2 + }, + "id":16, + "panels":[ + { + "aliasColors":{ + + }, + "bars":false, + "dashLength":10, + "dashes":false, + "datasource":"prometheus", + "fill":1, + "gridPos":{ + "h":10, + "w":24, + "x":0, + "y":3 + }, + "id":10, + "legend":{ + "avg":false, + "current":false, + "max":false, + "min":false, + "show":true, + "total":false, + "values":false + }, + "lines":true, + "linewidth":1, + "links":[ + + ], + "nullPointMode":"null", + "percentage":false, + "pointradius":5, + "points":false, + "renderer":"flot", + "seriesOverrides":[ + + ], + "spaceLength":10, + "stack":false, + "steppedLine":true, + "targets":[ + { + "expr":"sum(autoscaler_desired_pod_count{configuration_namespace=\"$namespace\", configuration=\"$configuration\", revision=\"$revision\"}) ", + "format":"time_series", + "intervalFactor":1, + "legendFormat":"Desired Pods", + "refId":"A" + }, + { + "expr":"sum(autoscaler_observed_pod_count{configuration_namespace=\"$namespace\", configuration=\"$configuration\", revision=\"$revision\"})", + "format":"time_series", + "intervalFactor":1, + "legendFormat":"Observed Pods", + "refId":"B" + } + ], + "thresholds":[ + + ], + "timeFrom":null, + "timeShift":null, + "title":"Pod Counts", + "tooltip":{ + "shared":true, + "sort":0, + "value_type":"individual" + }, + "type":"graph", + "xaxis":{ + "buckets":null, + "mode":"time", + "name":null, + "show":true, + "values":[ + + ] + }, + "yaxes":[ + { + "format":"short", + "label":null, + "logBase":1, + "max":null, + "min":null, + "show":true + }, + { + "format":"short", + "label":null, + "logBase":1, + "max":null, + "min":null, + "show":true + } + ] + }, + { + "aliasColors":{ + + }, + "bars":false, + "dashLength":10, + "dashes":false, + "datasource":"prometheus", + "fill":1, + "gridPos":{ + "h":9, + "w":24, + "x":0, + "y":13 + }, + "id":8, + "legend":{ + "avg":false, + "current":false, + "max":false, + "min":false, + "show":true, + "total":false, + "values":false + }, + "lines":true, + "linewidth":1, + "links":[ + + ], + "nullPointMode":"null", + "percentage":false, + "pointradius":5, + "points":false, + "renderer":"flot", + "seriesOverrides":[ + { + "alias":"Panic Mode", + "color":"#ea6460", + "dashes":true, + "fill":2, + "linewidth":2, + "steppedLine":true, + "yaxis":2 + }, + { + "alias":"Target Concurrency Per Pod", + "color":"#0a50a1", + "dashes":true, + "steppedLine":false + } + ], + "spaceLength":10, + "stack":false, + "steppedLine":true, + "targets":[ + { + "expr":"sum(autoscaler_observed_stable_concurrency{configuration_namespace=\"$namespace\", configuration=\"$configuration\", revision=\"$revision\"})", + "format":"time_series", + "interval":"1s", + "intervalFactor":1, + "legendFormat":"Stable Concurrency", + "refId":"A" + }, + { + "expr":"sum(autoscaler_observed_panic_concurrency{configuration_namespace=\"$namespace\", configuration=\"$configuration\", revision=\"$revision\"})", + "format":"time_series", + "interval":"1s", + "intervalFactor":1, + "legendFormat":"Panic Concurrency", + "refId":"B" + }, + { + "expr":"sum(autoscaler_target_concurrency_per_pod{configuration_namespace=\"$namespace\", configuration=\"$configuration\", revision=\"$revision\"})", + "format":"time_series", + "intervalFactor":1, + "legendFormat":"Target Concurrency Per Pod", + "refId":"C" + } + ], + "thresholds":[ + + ], + "timeFrom":null, + "timeShift":null, + "title":"Observed Concurrency", + "tooltip":{ + "shared":true, + "sort":0, + "value_type":"individual" + }, + "type":"graph", + "xaxis":{ + "buckets":null, + "mode":"time", + "name":null, + "show":true, + "values":[ + + ] + }, + "yaxes":[ + { + "format":"short", + "label":"", + "logBase":1, + "max":null, + "min":null, + "show":true + }, + { + "format":"short", + "label":"", + "logBase":1, + "max":null, + "min":null, + "show":false + } + ] + }, + { + "aliasColors":{ + + }, + "bars":false, + "dashLength":10, + "dashes":false, + "datasource":"prometheus", + "decimals":null, + "fill":1, + "gridPos":{ + "h":9, + "w":24, + "x":0, + "y":22 + }, + "id":12, + "legend":{ + "avg":false, + "current":false, + "hideZero":false, + "max":false, + "min":false, + "show":false, + "total":false, + "values":false + }, + "lines":true, + "linewidth":1, + "links":[ + + ], + "nullPointMode":"null", + "percentage":false, + "pointradius":5, + "points":false, + "renderer":"flot", + "seriesOverrides":[ + { + "alias":"Panic Mode", + "color":"#e24d42", + "linewidth":2, + "yaxis":2 + } + ], + "spaceLength":10, + "stack":false, + "steppedLine":true, + "targets":[ + { + "expr":"sum(autoscaler_panic_mode{configuration_namespace=\"$namespace\", configuration=\"$configuration\", revision=\"$revision\"} )", + "format":"time_series", + "intervalFactor":1, + "legendFormat":"Panic Mode", + "refId":"A" + } + ], + "thresholds":[ + + ], + "timeFrom":null, + "timeShift":null, + "title":"Panic Mode", + "tooltip":{ + "shared":true, + "sort":0, + "value_type":"individual" + }, + "type":"graph", + "xaxis":{ + "buckets":null, + "mode":"time", + "name":null, + "show":true, + "values":[ + + ] + }, + "yaxes":[ + { + "format":"short", + "label":null, + "logBase":1, + "max":"1.0", + "min":"0", + "show":true + }, + { + "format":"short", + "label":null, + "logBase":1, + "max":null, + "min":null, + "show":false + } + ] + } + ], + "title":"Debugging Metrics", + "type":"row" + } + ], + "refresh":false, + "schemaVersion":16, + "style":"dark", + "tags":[ + + ], + "templating":{ + "list":[ + { + "allValue":null, + "current":{ + + }, + "datasource":"prometheus", + "hide":0, + "includeAll":false, + "label":"Namespace", + "multi":false, + "name":"namespace", + "options":[ + + ], + "query":"label_values(autoscaler_actual_pod_count, configuration_namespace)", + "refresh":1, + "regex":"", + "sort":1, + "tagValuesQuery":"", + "tags":[ + + ], + "tagsQuery":"", + "type":"query", + "useTags":false + }, + { + "allValue":null, + "current":{ + + }, + "datasource":"prometheus", + "hide":0, + "includeAll":false, + "label":"Configuration", + "multi":false, + "name":"configuration", + "options":[ + + ], + "query":"label_values(autoscaler_actual_pod_count{configuration_namespace=\"$namespace\"}, configuration)", + "refresh":1, + "regex":"", + "sort":1, + "tagValuesQuery":"", + "tags":[ + + ], + "tagsQuery":"", + "type":"query", + "useTags":false + }, + { + "allValue":null, + "current":{ + + }, + "datasource":"prometheus", + "hide":0, + "includeAll":false, + "label":"Revision", + "multi":false, + "name":"revision", + "options":[ + + ], + "query":"label_values(autoscaler_actual_pod_count{configuration_namespace=\"$namespace\", configuration=\"$configuration\"}, revision)", + "refresh":1, + "regex":"", + "sort":2, + "tagValuesQuery":"", + "tags":[ + + ], + "tagsQuery":"", + "type":"query", + "useTags":false + } + ] + }, + "time":{ + "from":"now-15m", + "to":"now" + }, + "timepicker":{ + "refresh_intervals":[ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-15m", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "Knative Serving - Scaling Debugging", - "uid": "u_-9SIMiz", - "version": 2 - } \ No newline at end of file + "time_options":[ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone":"", + "title":"Knative Serving - Scaling Debugging", + "uid":"u_-9SIMiz", + "version":2 + } From 0e2c1fce21c409022ee759cf44fc79db904cfe92 Mon Sep 17 00:00:00 2001 From: Yao Wu Date: Mon, 9 Jul 2018 15:13:50 -0700 Subject: [PATCH 5/9] adjust dashboard order --- .../100-scaling-configmap-dev.yaml | 1413 ++++++++--------- pkg/activator/revision.go | 2 - 2 files changed, 662 insertions(+), 753 deletions(-) diff --git a/config/monitoring/150-elasticsearch-dev/100-scaling-configmap-dev.yaml b/config/monitoring/150-elasticsearch-dev/100-scaling-configmap-dev.yaml index a541c9c0f8fd..9118de4f9bc2 100644 --- a/config/monitoring/150-elasticsearch-dev/100-scaling-configmap-dev.yaml +++ b/config/monitoring/150-elasticsearch-dev/100-scaling-configmap-dev.yaml @@ -74,6 +74,7 @@ data: ], "panels":[ { + "collapsed":true, "gridPos":{ "h":1, @@ -81,392 +82,335 @@ data: "x":0, "y":0 }, - "id":20, + "id":14, "panels":[ - { - "aliasColors":{ + { + "aliasColors":{ - }, - "bars":false, - "dashLength":10, - "dashes":false, - "datasource":"prometheus", - "fill":1, - "gridPos":{ - "h":11, - "w":24, - "x":0, - "y":1 - }, - "id":22, - "legend":{ - "avg":false, - "current":false, - "max":false, - "min":false, - "show":true, - "total":false, - "values":false - }, - "lines":true, - "linewidth":1, - "links":[ + }, + "bars":false, + "dashLength":10, + "dashes":false, + "datasource":"prometheus", + "fill":1, + "gridPos":{ + "h":11, + "w":24, + "x":0, + "y":1 + }, + "id":2, + "legend":{ + "avg":false, + "current":false, + "max":false, + "min":false, + "show":true, + "total":false, + "values":false + }, + "lines":true, + "linewidth":1, + "links":[ - ], - "nullPointMode":"null", - "percentage":false, - "pointradius":5, - "points":false, - "renderer":"flot", - "seriesOverrides":[ + ], + "nullPointMode":"null", + "percentage":false, + "pointradius":5, + "points":false, + "renderer":"flot", + "seriesOverrides":[ - ], - "spaceLength":10, - "stack":false, - "steppedLine":false, - "targets":[ - { - "expr":"label_replace(sum(increase(activator_revision_request_count{destination_namespace=\"$namespace\", destination_configuration=~\"$configuration\",destination_revision=~\"$revision\"}[1m])) by (destination_revision), \"destination_revision\", \"$2\", \"destination_revision\", \"$configuration(-+)(.*)\")", - "format":"time_series", - "interval":"", - "intervalFactor":1, - "legendFormat":"{{destination_revision}}", - "refId":"A" - } - ], - "thresholds":[ + ], + "spaceLength":10, + "stack":false, + "steppedLine":true, + "targets":[ + { + "expr":"sum(autoscaler_actual_pod_count{configuration_namespace=\"$namespace\", configuration=\"$configuration\", revision=\"$revision\"})", + "format":"time_series", + "interval":"1s", + "intervalFactor":1, + "legendFormat":"Actual Pods", + "refId":"A" + }, + { + "expr":"sum(autoscaler_requested_pod_count{configuration_namespace=\"$namespace\", configuration=\"$configuration\", revision=\"$revision\"})", + "format":"time_series", + "interval":"1s", + "intervalFactor":1, + "legendFormat":"Requested Pods", + "refId":"C" + } + ], + "thresholds":[ - ], - "timeFrom":null, - "timeShift":null, - "title":"Request Count in last minute by Revision", - "tooltip":{ - "shared":true, - "sort":0, - "value_type":"individual" - }, - "type":"graph", - "xaxis":{ - "buckets":null, - "mode":"time", - "name":null, - "show":true, - "values":[ + ], + "timeFrom":null, + "timeShift":null, + "title":"Revision Pod Counts", + "tooltip":{ + "shared":true, + "sort":0, + "value_type":"individual" + }, + "type":"graph", + "xaxis":{ + "buckets":null, + "mode":"time", + "name":null, + "show":true, + "values":[ - ] - }, - "yaxes":[ - { - "format":"none", - "label":null, - "logBase":1, - "max":null, - "min":"0", - "show":true - }, - { - "format":"short", - "label":null, - "logBase":1, - "max":null, - "min":null, - "show":true - } - ] - }, - { - "aliasColors":{ + ] + }, + "yaxes":[ + { + "format":"short", + "label":null, + "logBase":1, + "max":null, + "min":null, + "show":true + }, + { + "decimals":null, + "format":"short", + "label":"Concurrency", + "logBase":1, + "max":"1", + "min":null, + "show":false + } + ] + } + ], + "title":"Revision Pod Counts", + "type":"row" + }, + { + "collapsed":true, + "gridPos":{ + "h":1, + "w":24, + "x":0, + "y":1 + }, + "id":18, + "panels":[ + { + "aliasColors":{ - }, - "bars":false, - "dashLength":10, - "dashes":false, - "datasource":"prometheus", - "fill":1, - "gridPos":{ - "h":10, - "w":24, - "x":0, - "y":12 - }, - "id":24, - "legend":{ - "avg":false, - "current":false, - "max":false, - "min":false, - "show":true, - "total":false, - "values":false - }, - "lines":true, - "linewidth":1, - "links":[ + }, + "bars":false, + "dashLength":10, + "dashes":false, + "datasource":"prometheus", + "fill":1, + "gridPos":{ + "h":9, + "w":12, + "x":0, + "y":13 + }, + "id":4, + "legend":{ + "avg":false, + "current":false, + "max":false, + "min":false, + "show":true, + "total":false, + "values":false + }, + "lines":true, + "linewidth":1, + "links":[ - ], - "nullPointMode":"null", - "percentage":false, - "pointradius":5, - "points":false, - "renderer":"flot", - "seriesOverrides":[ - - ], - "spaceLength":10, - "stack":false, - "steppedLine":false, - "targets":[ - { - "expr":"round(sum(increase(activator_revision_response_count{destination_namespace=\"$namespace\", destination_configuration=~\"$configuration\",destination_revision=~\"$revision\"}[1m])) by (response_code))", - "format":"time_series", - "intervalFactor":1, - "legendFormat":"{{ response_code }}", - "refId":"A" - } - ], - "thresholds":[ + ], + "nullPointMode":"null", + "percentage":false, + "pointradius":5, + "points":false, + "renderer":"flot", + "seriesOverrides":[ - ], - "timeFrom":null, - "timeShift":null, - "title":"Response Count in last minute by Response Code", - "tooltip":{ - "shared":true, - "sort":0, - "value_type":"individual" - }, - "type":"graph", - "xaxis":{ - "buckets":null, - "mode":"time", - "name":null, - "show":true, - "values":[ + ], + "spaceLength":10, + "stack":false, + "steppedLine":false, + "targets":[ + { + "expr":"sum(kube_pod_container_resource_requests_cpu_cores{namespace=\"$namespace\", pod=~\"$revision-deployment-.*\"})", + "format":"time_series", + "interval":"", + "intervalFactor":1, + "legendFormat":"Cores requested", + "refId":"A" + }, + { + "expr":"sum(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod_name=~\"$revision-deployment-.*\"}[1m]))", + "format":"time_series", + "intervalFactor":1, + "legendFormat":"Cores used", + "refId":"B" + }, + { + "expr":"sum(kube_pod_container_resource_limits_cpu_cores{namespace=\"$namespace\", pod=~\"$revision-deployment-.*\"})", + "format":"time_series", + "intervalFactor":1, + "legendFormat":"Core limit", + "refId":"C" + } + ], + "thresholds":[ - ] - }, - "yaxes":[ - { - "format":"none", - "label":null, - "logBase":1, - "max":null, - "min":"0", - "show":true - }, - { - "format":"short", - "label":null, - "logBase":1, - "max":null, - "min":null, - "show":true - } - ] - }, - { - "aliasColors":{ + ], + "timeFrom":null, + "timeShift":null, + "title":"Revision CPU Usage", + "tooltip":{ + "shared":true, + "sort":2, + "value_type":"individual" + }, + "type":"graph", + "xaxis":{ + "buckets":null, + "mode":"time", + "name":null, + "show":true, + "values":[ - }, - "bars":false, - "dashLength":10, - "dashes":false, - "datasource":"prometheus", - "fill":1, - "gridPos":{ - "h":10, - "w":24, - "x":0, - "y":22 - }, - "id":26, - "legend":{ - "avg":false, - "current":false, - "max":false, - "min":false, - "show":true, - "total":false, - "values":false - }, - "lines":true, - "linewidth":1, - "links":[ + ] + }, + "yaxes":[ + { + "decimals":null, + "format":"short", + "label":null, + "logBase":1, + "max":null, + "min":null, + "show":true + }, + { + "format":"short", + "label":null, + "logBase":1, + "max":null, + "min":null, + "show":false + } + ] + }, + { + "aliasColors":{ - ], - "nullPointMode":"null", - "percentage":false, - "pointradius":5, - "points":false, - "renderer":"flot", - "seriesOverrides":[ + }, + "bars":false, + "dashLength":10, + "dashes":false, + "datasource":"prometheus", + "fill":1, + "gridPos":{ + "h":9, + "w":12, + "x":12, + "y":13 + }, + "id":6, + "legend":{ + "avg":false, + "current":false, + "max":false, + "min":false, + "show":true, + "total":false, + "values":false + }, + "lines":true, + "linewidth":1, + "links":[ - ], - "spaceLength":10, - "stack":false, - "steppedLine":false, - "targets":[ - { - "expr":"sum(increase(activator_revision_response_count{destination_namespace=\"$namespace\", destination_configuration=~\"$configuration\",destination_revision=~\"$revision\"}[1m]))", - "format":"time_series", - "intervalFactor":1, - "legendFormat":"{{num_tries}}", - "refId":"A" - } - ], - "thresholds":[ + ], + "nullPointMode":"null", + "percentage":false, + "pointradius":5, + "points":false, + "renderer":"flot", + "seriesOverrides":[ - ], - "timeFrom":null, - "timeShift":null, - "title":"Response Count in last minute by number of tries", - "tooltip":{ - "shared":true, - "sort":0, - "value_type":"individual" - }, - "type":"graph", - "xaxis":{ - "buckets":null, - "mode":"time", - "name":null, - "show":true, - "values":[ + ], + "spaceLength":10, + "stack":false, + "steppedLine":false, + "targets":[ + { + "expr":"sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\", pod=~\"$revision-deployment-.*\"})", + "format":"time_series", + "interval":"", + "intervalFactor":1, + "legendFormat":"Memory requested", + "refId":"A" + }, + { + "expr":"sum(container_memory_usage_bytes{namespace=\"$namespace\", pod_name=~\"$revision-deployment-.*\"})", + "format":"time_series", + "hide":false, + "intervalFactor":1, + "legendFormat":"Memory used", + "refId":"B" + }, + { + "expr":"sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\", pod=~\"$revision-deployment-.*\"})", + "format":"time_series", + "intervalFactor":1, + "refId":"C" + } + ], + "thresholds":[ - ] - }, - "yaxes":[ - { - "format":"short", - "label":null, - "logBase":1, - "max":null, - "min":"0", - "show":true - }, - { - "format":"short", - "label":null, - "logBase":1, - "max":null, - "min":null, - "show":true - } - ] - }, - { - "aliasColors":{ - - }, - "bars":false, - "dashLength":10, - "dashes":false, - "datasource":"prometheus", - "fill":1, - "gridPos":{ - "h":10, - "w":24, - "x":0, - "y":32 - }, - "id":28, - "legend":{ - "avg":true, - "current":false, - "max":false, - "min":false, - "show":true, - "total":false, - "values":true - }, - "lines":true, - "linewidth":1, - "links":[ - - ], - "nullPointMode":"null", - "percentage":false, - "pointradius":5, - "points":false, - "renderer":"flot", - "seriesOverrides":[ - - ], - "spaceLength":10, - "stack":false, - "steppedLine":false, - "targets":[ - { - "expr":"label_replace(histogram_quantile(0.50, sum(rate(activator_response_time_msec_bucket{destination_namespace=\"$namespace\", destination_configuration=~\"$configuration\",destination_revision=~\"$revision\"}[1m])) by (destination_revision, le)), \"destination_revision\", \"$2\", \"destination_revision\", \"$configuration(-+)(.*)\")", - "format":"time_series", - "intervalFactor":1, - "legendFormat":"{{ destination_revision }} (p50)", - "refId":"A" - }, - { - "expr":"label_replace(histogram_quantile(0.90, sum(rate(activator_response_time_msec_bucket{destination_namespace=\"$namespace\", destination_configuration=~\"$configuration\",destination_revision=~\"$revision\"}[1m])) by (destination_revision, le)), \"destination_revision\", \"$2\", \"destination_revision\", \"$configuration(-+)(.*)\")", - "format":"time_series", - "intervalFactor":1, - "legendFormat":"{{ destination_revision }} (p90)", - "refId":"B" - }, - { - "expr":"label_replace(histogram_quantile(0.95, sum(rate(activator_response_time_msec_bucket{destination_namespace=\"$namespace\", destination_configuration=~\"$configuration\",destination_revision=~\"$revision\"}[1m])) by (destination_revision, le)), \"destination_revision\", \"$2\", \"destination_revision\", \"$configuration(-+)(.*)\")", - "format":"time_series", - "intervalFactor":1, - "legendFormat":"{{ destination_revision }} (p95)", - "refId":"C" - }, - { - "expr":"label_replace(histogram_quantile(0.99, sum(rate(activator_response_time_msec_bucket{destination_namespace=\"$namespace\", destination_configuration=~\"$configuration\",destination_revision=~\"$revision\"}[1m])) by (destination_revision, le)), \"destination_revision\", \"$2\", \"destination_revision\", \"$configuration(-+)(.*)\")", - "format":"time_series", - "intervalFactor":1, - "legendFormat":"{{ destination_revision }} (p99)", - "refId":"D" - } - ], - "thresholds":[ - - ], - "timeFrom":null, - "timeShift":null, - "title":"Response Time in last minute", - "tooltip":{ - "shared":true, - "sort":0, - "value_type":"individual" - }, - "type":"graph", - "xaxis":{ - "buckets":null, - "mode":"time", - "name":null, - "show":true, - "values":[ + ], + "timeFrom":null, + "timeShift":null, + "title":"Pod Memory Usage", + "tooltip":{ + "shared":true, + "sort":2, + "value_type":"individual" + }, + "type":"graph", + "xaxis":{ + "buckets":null, + "mode":"time", + "name":null, + "show":true, + "values":[ - ] - }, - "yaxes":[ - { - "format":"ms", - "label":null, - "logBase":1, - "max":null, - "min":null, - "show":true - }, - { - "format":"short", - "label":null, - "logBase":1, - "max":null, - "min":null, - "show":true - } - ] - } + ] + }, + "yaxes":[ + { + "format":"decbytes", + "label":null, + "logBase":1, + "max":null, + "min":null, + "show":true + }, + { + "format":"short", + "label":null, + "logBase":1, + "max":null, + "min":null, + "show":false + } + ] + } ], - "title":"Activator Metrics", + "title":"Resource Usages", "type":"row" }, { @@ -475,9 +419,9 @@ data: "h":1, "w":24, "x":0, - "y":0 + "y":2 }, - "id":14, + "id":16, "panels":[ { "aliasColors":{ @@ -489,12 +433,12 @@ data: "datasource":"prometheus", "fill":1, "gridPos":{ - "h":11, + "h":10, "w":24, "x":0, - "y":1 + "y":3 }, - "id":2, + "id":10, "legend":{ "avg":false, "current":false, @@ -522,20 +466,18 @@ data: "steppedLine":true, "targets":[ { - "expr":"sum(autoscaler_actual_pod_count{configuration_namespace=\"$namespace\", configuration=\"$configuration\", revision=\"$revision\"})", + "expr":"sum(autoscaler_desired_pod_count{configuration_namespace=\"$namespace\", configuration=\"$configuration\", revision=\"$revision\"}) ", "format":"time_series", - "interval":"1s", "intervalFactor":1, - "legendFormat":"Actual Pods", + "legendFormat":"Desired Pods", "refId":"A" }, { - "expr":"sum(autoscaler_requested_pod_count{configuration_namespace=\"$namespace\", configuration=\"$configuration\", revision=\"$revision\"})", + "expr":"sum(autoscaler_observed_pod_count{configuration_namespace=\"$namespace\", configuration=\"$configuration\", revision=\"$revision\"})", "format":"time_series", - "interval":"1s", "intervalFactor":1, - "legendFormat":"Requested Pods", - "refId":"C" + "legendFormat":"Observed Pods", + "refId":"B" } ], "thresholds":[ @@ -543,7 +485,7 @@ data: ], "timeFrom":null, "timeShift":null, - "title":"Revision Pod Counts", + "title":"Pod Counts", "tooltip":{ "shared":true, "sort":0, @@ -569,30 +511,15 @@ data: "show":true }, { - "decimals":null, "format":"short", - "label":"Concurrency", + "label":null, "logBase":1, - "max":"1", + "max":null, "min":null, - "show":false + "show":true } ] - } - ], - "title":"Revision Pod Counts", - "type":"row" - }, - { - "collapsed":true, - "gridPos":{ - "h":1, - "w":24, - "x":0, - "y":1 - }, - "id":18, - "panels":[ + }, { "aliasColors":{ @@ -604,11 +531,11 @@ data: "fill":1, "gridPos":{ "h":9, - "w":12, + "w":24, "x":0, "y":13 }, - "id":4, + "id":8, "legend":{ "avg":false, "current":false, @@ -629,32 +556,47 @@ data: "points":false, "renderer":"flot", "seriesOverrides":[ - + { + "alias":"Panic Mode", + "color":"#ea6460", + "dashes":true, + "fill":2, + "linewidth":2, + "steppedLine":true, + "yaxis":2 + }, + { + "alias":"Target Concurrency Per Pod", + "color":"#0a50a1", + "dashes":true, + "steppedLine":false + } ], "spaceLength":10, "stack":false, - "steppedLine":false, + "steppedLine":true, "targets":[ { - "expr":"sum(kube_pod_container_resource_requests_cpu_cores{namespace=\"$namespace\", pod=~\"$revision-deployment-.*\"})", + "expr":"sum(autoscaler_observed_stable_concurrency{configuration_namespace=\"$namespace\", configuration=\"$configuration\", revision=\"$revision\"})", "format":"time_series", - "interval":"", + "interval":"1s", "intervalFactor":1, - "legendFormat":"Cores requested", + "legendFormat":"Stable Concurrency", "refId":"A" }, { - "expr":"sum(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod_name=~\"$revision-deployment-.*\"}[1m]))", + "expr":"sum(autoscaler_observed_panic_concurrency{configuration_namespace=\"$namespace\", configuration=\"$configuration\", revision=\"$revision\"})", "format":"time_series", + "interval":"1s", "intervalFactor":1, - "legendFormat":"Cores used", + "legendFormat":"Panic Concurrency", "refId":"B" }, { - "expr":"sum(kube_pod_container_resource_limits_cpu_cores{namespace=\"$namespace\", pod=~\"$revision-deployment-.*\"})", + "expr":"sum(autoscaler_target_concurrency_per_pod{configuration_namespace=\"$namespace\", configuration=\"$configuration\", revision=\"$revision\"})", "format":"time_series", "intervalFactor":1, - "legendFormat":"Core limit", + "legendFormat":"Target Concurrency Per Pod", "refId":"C" } ], @@ -663,10 +605,10 @@ data: ], "timeFrom":null, "timeShift":null, - "title":"Revision CPU Usage", + "title":"Observed Concurrency", "tooltip":{ "shared":true, - "sort":2, + "sort":0, "value_type":"individual" }, "type":"graph", @@ -681,9 +623,8 @@ data: }, "yaxes":[ { - "decimals":null, "format":"short", - "label":null, + "label":"", "logBase":1, "max":null, "min":null, @@ -691,7 +632,7 @@ data: }, { "format":"short", - "label":null, + "label":"", "logBase":1, "max":null, "min":null, @@ -707,20 +648,22 @@ data: "dashLength":10, "dashes":false, "datasource":"prometheus", + "decimals":null, "fill":1, "gridPos":{ "h":9, - "w":12, - "x":12, - "y":13 + "w":24, + "x":0, + "y":22 }, - "id":6, + "id":12, "legend":{ "avg":false, "current":false, + "hideZero":false, "max":false, "min":false, - "show":true, + "show":false, "total":false, "values":false }, @@ -735,33 +678,23 @@ data: "points":false, "renderer":"flot", "seriesOverrides":[ - + { + "alias":"Panic Mode", + "color":"#e24d42", + "linewidth":2, + "yaxis":2 + } ], "spaceLength":10, "stack":false, - "steppedLine":false, + "steppedLine":true, "targets":[ { - "expr":"sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\", pod=~\"$revision-deployment-.*\"})", + "expr":"sum(autoscaler_panic_mode{configuration_namespace=\"$namespace\", configuration=\"$configuration\", revision=\"$revision\"} )", "format":"time_series", - "interval":"", "intervalFactor":1, - "legendFormat":"Memory requested", + "legendFormat":"Panic Mode", "refId":"A" - }, - { - "expr":"sum(container_memory_usage_bytes{namespace=\"$namespace\", pod_name=~\"$revision-deployment-.*\"})", - "format":"time_series", - "hide":false, - "intervalFactor":1, - "legendFormat":"Memory used", - "refId":"B" - }, - { - "expr":"sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\", pod=~\"$revision-deployment-.*\"})", - "format":"time_series", - "intervalFactor":1, - "refId":"C" } ], "thresholds":[ @@ -769,10 +702,10 @@ data: ], "timeFrom":null, "timeShift":null, - "title":"Pod Memory Usage", + "title":"Panic Mode", "tooltip":{ "shared":true, - "sort":2, + "sort":0, "value_type":"individual" }, "type":"graph", @@ -787,11 +720,11 @@ data: }, "yaxes":[ { - "format":"decbytes", + "format":"short", "label":null, "logBase":1, - "max":null, - "min":null, + "max":"1.0", + "min":"0", "show":true }, { @@ -805,7 +738,7 @@ data: ] } ], - "title":"Resource Usages", + "title":"Autoscaler Metrics", "type":"row" }, { @@ -814,326 +747,304 @@ data: "h":1, "w":24, "x":0, - "y":2 + "y":3 }, - "id":16, + "id":20, "panels":[ - { - "aliasColors":{ + { + "aliasColors":{ - }, - "bars":false, - "dashLength":10, - "dashes":false, - "datasource":"prometheus", - "fill":1, - "gridPos":{ - "h":10, - "w":24, - "x":0, - "y":3 - }, - "id":10, - "legend":{ - "avg":false, - "current":false, - "max":false, - "min":false, - "show":true, - "total":false, - "values":false - }, - "lines":true, - "linewidth":1, - "links":[ + }, + "bars":false, + "dashLength":10, + "dashes":false, + "datasource":"prometheus", + "fill":1, + "gridPos":{ + "h":11, + "w":24, + "x":0, + "y":1 + }, + "id":22, + "legend":{ + "avg":false, + "current":false, + "max":false, + "min":false, + "show":true, + "total":false, + "values":false + }, + "lines":true, + "linewidth":1, + "links":[ - ], - "nullPointMode":"null", - "percentage":false, - "pointradius":5, - "points":false, - "renderer":"flot", - "seriesOverrides":[ + ], + "nullPointMode":"null", + "percentage":false, + "pointradius":5, + "points":false, + "renderer":"flot", + "seriesOverrides":[ - ], - "spaceLength":10, - "stack":false, - "steppedLine":true, - "targets":[ - { - "expr":"sum(autoscaler_desired_pod_count{configuration_namespace=\"$namespace\", configuration=\"$configuration\", revision=\"$revision\"}) ", - "format":"time_series", - "intervalFactor":1, - "legendFormat":"Desired Pods", - "refId":"A" - }, - { - "expr":"sum(autoscaler_observed_pod_count{configuration_namespace=\"$namespace\", configuration=\"$configuration\", revision=\"$revision\"})", - "format":"time_series", - "intervalFactor":1, - "legendFormat":"Observed Pods", - "refId":"B" - } - ], - "thresholds":[ + ], + "spaceLength":10, + "stack":false, + "steppedLine":false, + "targets":[ + { + "expr":"label_replace(sum(increase(activator_revision_request_count{destination_namespace=\"$namespace\", destination_configuration=~\"$configuration\",destination_revision=~\"$revision\"}[1m])) by (destination_revision), \"destination_revision\", \"$2\", \"destination_revision\", \"$configuration(-+)(.*)\")", + "format":"time_series", + "interval":"", + "intervalFactor":1, + "legendFormat":"{{destination_revision}}", + "refId":"A" + } + ], + "thresholds":[ - ], - "timeFrom":null, - "timeShift":null, - "title":"Pod Counts", - "tooltip":{ - "shared":true, - "sort":0, - "value_type":"individual" - }, - "type":"graph", - "xaxis":{ - "buckets":null, - "mode":"time", - "name":null, - "show":true, - "values":[ + ], + "timeFrom":null, + "timeShift":null, + "title":"Request Count in last minute by Revision", + "tooltip":{ + "shared":true, + "sort":0, + "value_type":"individual" + }, + "type":"graph", + "xaxis":{ + "buckets":null, + "mode":"time", + "name":null, + "show":true, + "values":[ - ] - }, - "yaxes":[ - { - "format":"short", - "label":null, - "logBase":1, - "max":null, - "min":null, - "show":true - }, - { - "format":"short", - "label":null, - "logBase":1, - "max":null, - "min":null, - "show":true - } - ] - }, - { - "aliasColors":{ + ] + }, + "yaxes":[ + { + "format":"none", + "label":null, + "logBase":1, + "max":null, + "min":"0", + "show":true + }, + { + "format":"short", + "label":null, + "logBase":1, + "max":null, + "min":null, + "show":true + } + ] + }, + { + "aliasColors":{ - }, - "bars":false, - "dashLength":10, - "dashes":false, - "datasource":"prometheus", - "fill":1, - "gridPos":{ - "h":9, - "w":24, - "x":0, - "y":13 - }, - "id":8, - "legend":{ - "avg":false, - "current":false, - "max":false, - "min":false, - "show":true, - "total":false, - "values":false - }, - "lines":true, - "linewidth":1, - "links":[ + }, + "bars":false, + "dashLength":10, + "dashes":false, + "datasource":"prometheus", + "fill":1, + "gridPos":{ + "h":10, + "w":24, + "x":0, + "y":12 + }, + "id":24, + "legend":{ + "avg":false, + "current":false, + "max":false, + "min":false, + "show":true, + "total":false, + "values":false + }, + "lines":true, + "linewidth":1, + "links":[ - ], - "nullPointMode":"null", - "percentage":false, - "pointradius":5, - "points":false, - "renderer":"flot", - "seriesOverrides":[ - { - "alias":"Panic Mode", - "color":"#ea6460", - "dashes":true, - "fill":2, - "linewidth":2, - "steppedLine":true, - "yaxis":2 - }, - { - "alias":"Target Concurrency Per Pod", - "color":"#0a50a1", - "dashes":true, - "steppedLine":false - } - ], - "spaceLength":10, - "stack":false, - "steppedLine":true, - "targets":[ - { - "expr":"sum(autoscaler_observed_stable_concurrency{configuration_namespace=\"$namespace\", configuration=\"$configuration\", revision=\"$revision\"})", - "format":"time_series", - "interval":"1s", - "intervalFactor":1, - "legendFormat":"Stable Concurrency", - "refId":"A" - }, - { - "expr":"sum(autoscaler_observed_panic_concurrency{configuration_namespace=\"$namespace\", configuration=\"$configuration\", revision=\"$revision\"})", - "format":"time_series", - "interval":"1s", - "intervalFactor":1, - "legendFormat":"Panic Concurrency", - "refId":"B" - }, - { - "expr":"sum(autoscaler_target_concurrency_per_pod{configuration_namespace=\"$namespace\", configuration=\"$configuration\", revision=\"$revision\"})", - "format":"time_series", - "intervalFactor":1, - "legendFormat":"Target Concurrency Per Pod", - "refId":"C" - } - ], - "thresholds":[ + ], + "nullPointMode":"null", + "percentage":false, + "pointradius":5, + "points":false, + "renderer":"flot", + "seriesOverrides":[ - ], - "timeFrom":null, - "timeShift":null, - "title":"Observed Concurrency", - "tooltip":{ - "shared":true, - "sort":0, - "value_type":"individual" - }, - "type":"graph", - "xaxis":{ - "buckets":null, - "mode":"time", - "name":null, - "show":true, - "values":[ + ], + "spaceLength":10, + "stack":false, + "steppedLine":false, + "targets":[ + { + "expr":"round(sum(increase(activator_revision_response_count{destination_namespace=\"$namespace\", destination_configuration=~\"$configuration\",destination_revision=~\"$revision\"}[1m])) by (response_code))", + "format":"time_series", + "intervalFactor":1, + "legendFormat":"{{ response_code }}", + "refId":"A" + } + ], + "thresholds":[ - ] - }, - "yaxes":[ - { - "format":"short", - "label":"", - "logBase":1, - "max":null, - "min":null, - "show":true - }, - { - "format":"short", - "label":"", - "logBase":1, - "max":null, - "min":null, - "show":false - } - ] - }, - { - "aliasColors":{ + ], + "timeFrom":null, + "timeShift":null, + "title":"Response Count in last minute by Response Code", + "tooltip":{ + "shared":true, + "sort":0, + "value_type":"individual" + }, + "type":"graph", + "xaxis":{ + "buckets":null, + "mode":"time", + "name":null, + "show":true, + "values":[ - }, - "bars":false, - "dashLength":10, - "dashes":false, - "datasource":"prometheus", - "decimals":null, - "fill":1, - "gridPos":{ - "h":9, - "w":24, - "x":0, - "y":22 - }, - "id":12, - "legend":{ - "avg":false, - "current":false, - "hideZero":false, - "max":false, - "min":false, - "show":false, - "total":false, - "values":false - }, - "lines":true, - "linewidth":1, - "links":[ + ] + }, + "yaxes":[ + { + "format":"none", + "label":null, + "logBase":1, + "max":null, + "min":"0", + "show":true + }, + { + "format":"short", + "label":null, + "logBase":1, + "max":null, + "min":null, + "show":true + } + ] + }, + { + "aliasColors":{ - ], - "nullPointMode":"null", - "percentage":false, - "pointradius":5, - "points":false, - "renderer":"flot", - "seriesOverrides":[ - { - "alias":"Panic Mode", - "color":"#e24d42", - "linewidth":2, - "yaxis":2 - } - ], - "spaceLength":10, - "stack":false, - "steppedLine":true, - "targets":[ - { - "expr":"sum(autoscaler_panic_mode{configuration_namespace=\"$namespace\", configuration=\"$configuration\", revision=\"$revision\"} )", - "format":"time_series", - "intervalFactor":1, - "legendFormat":"Panic Mode", - "refId":"A" - } - ], - "thresholds":[ + }, + "bars":false, + "dashLength":10, + "dashes":false, + "datasource":"prometheus", + "fill":1, + "gridPos":{ + "h":10, + "w":24, + "x":0, + "y":32 + }, + "id":28, + "legend":{ + "avg":true, + "current":false, + "max":false, + "min":false, + "show":true, + "total":false, + "values":true + }, + "lines":true, + "linewidth":1, + "links":[ - ], - "timeFrom":null, - "timeShift":null, - "title":"Panic Mode", - "tooltip":{ - "shared":true, - "sort":0, - "value_type":"individual" - }, - "type":"graph", - "xaxis":{ - "buckets":null, - "mode":"time", - "name":null, - "show":true, - "values":[ + ], + "nullPointMode":"null", + "percentage":false, + "pointradius":5, + "points":false, + "renderer":"flot", + "seriesOverrides":[ - ] - }, - "yaxes":[ - { - "format":"short", - "label":null, - "logBase":1, - "max":"1.0", - "min":"0", - "show":true - }, - { - "format":"short", - "label":null, - "logBase":1, - "max":null, - "min":null, - "show":false - } - ] - } + ], + "spaceLength":10, + "stack":false, + "steppedLine":false, + "targets":[ + { + "expr":"label_replace(histogram_quantile(0.50, sum(rate(activator_response_time_msec_bucket{destination_namespace=\"$namespace\", destination_configuration=~\"$configuration\",destination_revision=~\"$revision\"}[1m])) by (destination_revision, le)), \"destination_revision\", \"$2\", \"destination_revision\", \"$configuration(-+)(.*)\")", + "format":"time_series", + "intervalFactor":1, + "legendFormat":"{{ destination_revision }} (p50)", + "refId":"A" + }, + { + "expr":"label_replace(histogram_quantile(0.90, sum(rate(activator_response_time_msec_bucket{destination_namespace=\"$namespace\", destination_configuration=~\"$configuration\",destination_revision=~\"$revision\"}[1m])) by (destination_revision, le)), \"destination_revision\", \"$2\", \"destination_revision\", \"$configuration(-+)(.*)\")", + "format":"time_series", + "intervalFactor":1, + "legendFormat":"{{ destination_revision }} (p90)", + "refId":"B" + }, + { + "expr":"label_replace(histogram_quantile(0.95, sum(rate(activator_response_time_msec_bucket{destination_namespace=\"$namespace\", destination_configuration=~\"$configuration\",destination_revision=~\"$revision\"}[1m])) by (destination_revision, le)), \"destination_revision\", \"$2\", \"destination_revision\", \"$configuration(-+)(.*)\")", + "format":"time_series", + "intervalFactor":1, + "legendFormat":"{{ destination_revision }} (p95)", + "refId":"C" + }, + { + "expr":"label_replace(histogram_quantile(0.99, sum(rate(activator_response_time_msec_bucket{destination_namespace=\"$namespace\", destination_configuration=~\"$configuration\",destination_revision=~\"$revision\"}[1m])) by (destination_revision, le)), \"destination_revision\", \"$2\", \"destination_revision\", \"$configuration(-+)(.*)\")", + "format":"time_series", + "intervalFactor":1, + "legendFormat":"{{ destination_revision }} (p99)", + "refId":"D" + } + ], + "thresholds":[ + + ], + "timeFrom":null, + "timeShift":null, + "title":"Response Time in last minute", + "tooltip":{ + "shared":true, + "sort":0, + "value_type":"individual" + }, + "type":"graph", + "xaxis":{ + "buckets":null, + "mode":"time", + "name":null, + "show":true, + "values":[ + + ] + }, + "yaxes":[ + { + "format":"ms", + "label":null, + "logBase":1, + "max":null, + "min":null, + "show":true + }, + { + "format":"short", + "label":null, + "logBase":1, + "max":null, + "min":null, + "show":true + } + ] + } ], - "title":"Debugging Metrics", + "title":"Activator Metrics", "type":"row" } ], diff --git a/pkg/activator/revision.go b/pkg/activator/revision.go index 4155c0f1e360..9ab7104dcfe5 100644 --- a/pkg/activator/revision.go +++ b/pkg/activator/revision.go @@ -83,9 +83,7 @@ func (r *revisionActivator) ActiveEndpoint(namespace, configuration, name string return internalError("Disregarding activation request for retired revision ") case v1alpha1.RevisionServingStateActive: // Revision is already active. Nothing to do - //r.reporter.ReportRequest(namespace, configuration, name, "Active", RequestCountM, 1.0) case v1alpha1.RevisionServingStateReserve: - //r.reporter.ReportRequest(namespace, configuration, name, "Reserve", RequestCountM, 1.0) // Activate the revision revision.Spec.ServingState = v1alpha1.RevisionServingStateActive if _, err := revisionClient.Update(revision); err != nil { From f96f43f183a5df235502327bcec851b871dec0c6 Mon Sep 17 00:00:00 2001 From: Yao Wu Date: Mon, 9 Jul 2018 15:28:48 -0700 Subject: [PATCH 6/9] simplify interface --- pkg/activator/dedupe.go | 2 +- pkg/activator/revision.go | 4 ++-- pkg/activator/stats_reporter.go | 7 +++---- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/pkg/activator/dedupe.go b/pkg/activator/dedupe.go index eee2dcc0a07d..9bbbb551385c 100644 --- a/pkg/activator/dedupe.go +++ b/pkg/activator/dedupe.go @@ -127,7 +127,7 @@ func (a *dedupingActivator) reportRequests(id revisionID, count int) error { if err != nil { return fmt.Errorf("Unable to get revision %s for namespace: %s", id.name, id.namespace) } - a.reporter.ReportRequest(id.namespace, id.configuration, id.name, string(revision.Spec.ServingState), RequestCountM, float64(count)) + a.reporter.ReportRequest(id.namespace, id.configuration, id.name, string(revision.Spec.ServingState), float64(count)) logger.Infof("Wrote request_count metric for revision %s for namespace %s with value %d", id.name, id.namespace, count) return nil } diff --git a/pkg/activator/revision.go b/pkg/activator/revision.go index 9ab7104dcfe5..492b9cc4191b 100644 --- a/pkg/activator/revision.go +++ b/pkg/activator/revision.go @@ -76,10 +76,10 @@ func (r *revisionActivator) ActiveEndpoint(namespace, configuration, name string switch revision.Spec.ServingState { default: - r.reporter.ReportRequest(namespace, configuration, name, "Unknown", RequestCountM, 1.0) + r.reporter.ReportRequest(namespace, configuration, name, "Unknown", 1.0) return internalError("Disregarding activation request for revision in unknown state %v", revision.Spec.ServingState) case v1alpha1.RevisionServingStateRetired: - r.reporter.ReportRequest(namespace, configuration, name, "Retired", RequestCountM, 1.0) + r.reporter.ReportRequest(namespace, configuration, name, string(v1alpha1.RevisionServingStateRetired), 1.0) return internalError("Disregarding activation request for retired revision ") case v1alpha1.RevisionServingStateActive: // Revision is already active. Nothing to do diff --git a/pkg/activator/stats_reporter.go b/pkg/activator/stats_reporter.go index 9753e1fef7ce..05051c854dd1 100644 --- a/pkg/activator/stats_reporter.go +++ b/pkg/activator/stats_reporter.go @@ -57,8 +57,7 @@ var ( // StatsReporter defines the interface for sending activator metrics type StatsReporter interface { - //Report(ns string, config string, rev string, m Measurement, v float64) error - ReportRequest(ns, config, rev, servingState string, m Measurement, v float64) error + ReportRequest(ns, config, rev, servingState string, v float64) error ReportResponseCount(ns, config, rev string, responseCode, numTries int, v float64) error ReportResponseTime(ns, config, rev string, d time.Duration) error } @@ -159,7 +158,7 @@ func (r *Reporter) Report(ns string, config string, rev string, m Measurement, v } // reportRequest captures value v for measurement m. -func (r *Reporter) ReportRequest(ns, config, rev, servingState string, m Measurement, v float64) error { +func (r *Reporter) ReportRequest(ns, config, rev, servingState string, v float64) error { if !r.initialized { return errors.New("StatsReporter is not initialized yet") } @@ -174,7 +173,7 @@ func (r *Reporter) ReportRequest(ns, config, rev, servingState string, m Measure return err } - stats.Record(ctx, measurements[m].M(v)) + stats.Record(ctx, measurements[RequestCountM].M(v)) return nil } From 6c8530c0f1cb7d285bc3977b1ffd8cc185996edb Mon Sep 17 00:00:00 2001 From: Yao Wu Date: Tue, 10 Jul 2018 15:35:20 -0700 Subject: [PATCH 7/9] add unit test --- cmd/activator/main.go | 9 +- .../100-scaling-configmap-dev.yaml | 6 +- pkg/activator/dedupe.go | 45 ++++--- pkg/activator/dedupe_test.go | 91 +++++++++----- pkg/activator/revision_test.go | 15 ++- pkg/activator/stats_reporter.go | 9 +- pkg/activator/stats_reporter_test.go | 117 ++++++++++++++---- 7 files changed, 207 insertions(+), 85 deletions(-) diff --git a/cmd/activator/main.go b/cmd/activator/main.go index 23ccc2ddf2e8..8b6dad18e38c 100644 --- a/cmd/activator/main.go +++ b/cmd/activator/main.go @@ -57,6 +57,7 @@ type activationHandler struct { type retryRoundTripper struct { logger *zap.SugaredLogger reporter activator.StatsReporter + start time.Time } func (rrt retryRoundTripper) RoundTrip(r *http.Request) (*http.Response, error) { @@ -116,6 +117,7 @@ func (rrt retryRoundTripper) RoundTrip(r *http.Request) (*http.Response, error) name := r.Header.Get(controller.GetRevisionHeaderName()) config := r.Header.Get(controller.GetConfigurationHeader()) rrt.reporter.ReportResponseCount(namespace, config, name, resp.StatusCode, i, 1.0) + rrt.reporter.ReportResponseTime(namespace, config, name, resp.StatusCode, time.Now().Sub(rrt.start)) } return resp, nil } @@ -128,7 +130,8 @@ func (a *activationHandler) handler(w http.ResponseWriter, r *http.Request) { if r.ContentLength > maxUploadBytes { w.WriteHeader(http.StatusRequestEntityTooLarge) - a.reporter.ReportResponseTime(namespace, config, name, time.Now().Sub(start)) + a.reporter.ReportResponseCount(namespace, config, name, http.StatusRequestEntityTooLarge, 1, 1.0) + a.reporter.ReportResponseTime(namespace, config, name, http.StatusRequestEntityTooLarge, time.Now().Sub(start)) return } @@ -138,7 +141,7 @@ func (a *activationHandler) handler(w http.ResponseWriter, r *http.Request) { http.Error(w, msg, int(status)) a.logger.Errorf(msg) a.reporter.ReportResponseCount(namespace, config, name, int(status), 1, 1.0) - a.reporter.ReportResponseTime(namespace, config, name, time.Now().Sub(start)) + a.reporter.ReportResponseTime(namespace, config, name, int(status), time.Now().Sub(start)) return } target := &url.URL{ @@ -149,6 +152,7 @@ func (a *activationHandler) handler(w http.ResponseWriter, r *http.Request) { proxy.Transport = retryRoundTripper{ logger: a.logger, reporter: a.reporter, + start: start, } // TODO: Clear the host to avoid 404's. @@ -156,7 +160,6 @@ func (a *activationHandler) handler(w http.ResponseWriter, r *http.Request) { r.Host = "" proxy.ServeHTTP(w, r) - a.reporter.ReportResponseTime(namespace, config, name, time.Now().Sub(start)) } func main() { diff --git a/config/monitoring/150-elasticsearch-dev/100-scaling-configmap-dev.yaml b/config/monitoring/150-elasticsearch-dev/100-scaling-configmap-dev.yaml index 9118de4f9bc2..64c92e89841c 100644 --- a/config/monitoring/150-elasticsearch-dev/100-scaling-configmap-dev.yaml +++ b/config/monitoring/150-elasticsearch-dev/100-scaling-configmap-dev.yaml @@ -581,7 +581,7 @@ data: "format":"time_series", "interval":"1s", "intervalFactor":1, - "legendFormat":"Stable Concurrency", + "legendFormat":"60 Second Average Concurrency", "refId":"A" }, { @@ -589,14 +589,14 @@ data: "format":"time_series", "interval":"1s", "intervalFactor":1, - "legendFormat":"Panic Concurrency", + "legendFormat":"6 Second Average Panic Concurrency", "refId":"B" }, { "expr":"sum(autoscaler_target_concurrency_per_pod{configuration_namespace=\"$namespace\", configuration=\"$configuration\", revision=\"$revision\"})", "format":"time_series", "intervalFactor":1, - "legendFormat":"Target Concurrency Per Pod", + "legendFormat":"60 Second Target Concurrency", "refId":"C" } ], diff --git a/pkg/activator/dedupe.go b/pkg/activator/dedupe.go index 9bbbb551385c..c7534754b845 100644 --- a/pkg/activator/dedupe.go +++ b/pkg/activator/dedupe.go @@ -99,20 +99,31 @@ func (a *dedupingActivator) dedupe(id revisionID, ch chan activationResult) { func (a *dedupingActivator) activate(id revisionID) { logger := loggerWithRevisionInfo(a.logger, id.namespace, id.name) - a.mux.Lock() - defer a.mux.Unlock() - if reqs, ok := a.pendingRequests[id]; ok { - if err := a.reportRequests(id, len(reqs)); err != nil { - logger.Errorf("Failed to report request count metrics for revision %s for namespace %s", id.name, id.namespace) - } + revisionClient := a.knaClient.ServingV1alpha1().Revisions(id.namespace) + revision, err := revisionClient.Get(id.name, metav1.GetOptions{}) + // default serving state is unknown + state := "Unknown" + if err != nil { + logger.Errorf("Failed to get revision %s for namespace: %s", id.name, id.namespace) } + state = string(revision.Spec.ServingState) + + // if reqs, ok := a.pendingRequests[id]; ok { + // if err := a.reportRequests(id, len(reqs)); err != nil { + // logger.Errorf("Failed to report request count metrics for revision %s for namespace %s", id.name, id.namespace) + // } + // } endpoint, status, err := a.activator.ActiveEndpoint(id.namespace, id.configuration, id.name) + a.mux.Lock() + defer a.mux.Unlock() result := activationResult{ endpoint: endpoint, status: status, err: err, } if reqs, ok := a.pendingRequests[id]; ok { + a.reporter.ReportRequest(id.namespace, id.configuration, id.name, state, float64(len(reqs))) + logger.Infof("Wrote request_count metric for revision %s for namespace %s with value %d", id.name, id.namespace, len(reqs)) delete(a.pendingRequests, id) for _, ch := range reqs { ch <- result @@ -120,14 +131,14 @@ func (a *dedupingActivator) activate(id revisionID) { } } -func (a *dedupingActivator) reportRequests(id revisionID, count int) error { - logger := loggerWithRevisionInfo(a.logger, id.namespace, id.name) - revisionClient := a.knaClient.ServingV1alpha1().Revisions(id.namespace) - revision, err := revisionClient.Get(id.name, metav1.GetOptions{}) - if err != nil { - return fmt.Errorf("Unable to get revision %s for namespace: %s", id.name, id.namespace) - } - a.reporter.ReportRequest(id.namespace, id.configuration, id.name, string(revision.Spec.ServingState), float64(count)) - logger.Infof("Wrote request_count metric for revision %s for namespace %s with value %d", id.name, id.namespace, count) - return nil -} +// func (a *dedupingActivator) reportRequests(id revisionID, count int) error { +// logger := loggerWithRevisionInfo(a.logger, id.namespace, id.name) +// revisionClient := a.knaClient.ServingV1alpha1().Revisions(id.namespace) +// revision, err := revisionClient.Get(id.name, metav1.GetOptions{}) +// if err != nil { +// return fmt.Errorf("Unable to get revision %s for namespace: %s", id.name, id.namespace) +// } +// a.reporter.ReportRequest(id.namespace, id.configuration, id.name, string(revision.Spec.ServingState), float64(count)) +// logger.Infof("Wrote request_count metric for revision %s for namespace %s with value %d", id.name, id.namespace, count) +// return nil +// } diff --git a/pkg/activator/dedupe_test.go b/pkg/activator/dedupe_test.go index f03359fe4abe..28b79c071345 100644 --- a/pkg/activator/dedupe_test.go +++ b/pkg/activator/dedupe_test.go @@ -22,21 +22,28 @@ import ( "sync" "testing" "time" + + "github.com/knative/serving/pkg/apis/serving/v1alpha1" + . "github.com/knative/serving/pkg/logging/testing" ) func TestSingleRevision_SingleRequest_Success(t *testing.T) { + _, kna := fakeClients() + kna.ServingV1alpha1().Revisions(testNamespace).Create( + newRevisionBuilder(). + withServingState(v1alpha1.RevisionServingStateReserve).build()) want := Endpoint{"ip", 8080} f := newFakeActivator(t, map[revisionID]activationResult{ - revisionID{"default", "config", "rev1"}: activationResult{ + revisionID{testNamespace, testConfiguration, testRevision}: activationResult{ endpoint: want, status: Status(0), err: nil, }, }) - d := NewDedupingActivator(Activator(f)) + d := NewDedupingActivator(Activator(f), kna, TestLogger(t), &mockReporter{}) - endpoint, status, err := d.ActiveEndpoint("default", "config", "rev1") + endpoint, status, err := d.ActiveEndpoint(testNamespace, testConfiguration, testRevision) if err != nil { t.Errorf("Unexpected error: %v", err) @@ -53,20 +60,24 @@ func TestSingleRevision_SingleRequest_Success(t *testing.T) { } func TestSingleRevision_MultipleRequests_Success(t *testing.T) { + _, kna := fakeClients() + kna.ServingV1alpha1().Revisions(testNamespace).Create( + newRevisionBuilder(). + withServingState(v1alpha1.RevisionServingStateReserve).build()) ep := Endpoint{"ip", 8080} f := newFakeActivator(t, map[revisionID]activationResult{ - revisionID{"default", "config", "rev1"}: activationResult{ + revisionID{testNamespace, testConfiguration, testRevision}: activationResult{ endpoint: ep, status: Status(0), err: nil, }, }) - d := NewDedupingActivator(f) + d := NewDedupingActivator(f, kna, TestLogger(t), &mockReporter{}) got := concurrentTest(d, f, []revisionID{ - revisionID{"default", "config", "rev1"}, - revisionID{"default", "config", "rev1"}, + revisionID{testNamespace, testConfiguration, testRevision}, + revisionID{testNamespace, testConfiguration, testRevision}, }) want := []activationResult{ @@ -82,28 +93,33 @@ func TestSingleRevision_MultipleRequests_Success(t *testing.T) { } func TestMultipleRevisions_MultipleRequests_Success(t *testing.T) { + _, kna := fakeClients() + kna.ServingV1alpha1().Revisions(testNamespace).Create( + newRevisionBuilder().withRevisionName("rev1").build()) + kna.ServingV1alpha1().Revisions(testNamespace).Create( + newRevisionBuilder().withRevisionName("rev2").build()) ep1 := Endpoint{"ip1", 8080} ep2 := Endpoint{"ip2", 8080} f := newFakeActivator(t, map[revisionID]activationResult{ - revisionID{"default", "config", "rev1"}: activationResult{ + revisionID{testNamespace, testConfiguration, "rev1"}: activationResult{ endpoint: ep1, status: Status(0), err: nil, }, - revisionID{"default", "config", "rev2"}: activationResult{ + revisionID{testNamespace, testConfiguration, "rev2"}: activationResult{ endpoint: ep2, status: Status(0), err: nil, }, }) - d := NewDedupingActivator(f) + d := NewDedupingActivator(f, kna, TestLogger(t), &mockReporter{}) got := concurrentTest(d, f, []revisionID{ - revisionID{"default", "config", "rev1"}, - revisionID{"default", "config", "rev2"}, - revisionID{"default", "config", "rev1"}, - revisionID{"default", "config", "rev2"}, + revisionID{testNamespace, testConfiguration, "rev1"}, + revisionID{testNamespace, testConfiguration, "rev2"}, + revisionID{testNamespace, testConfiguration, "rev1"}, + revisionID{testNamespace, testConfiguration, "rev2"}, }) want := []activationResult{ @@ -116,34 +132,39 @@ func TestMultipleRevisions_MultipleRequests_Success(t *testing.T) { t.Errorf("Unexpected results. \nWant %+v. \nGot %+v", want, got) } if len(f.record) != 2 { - t.Errorf("Unexpected number of activation requests. Want 2. Got %v.", len(f.record)) + t.Errorf("Unexpected number of activation requests. Want 2. Got %v. %v", len(f.record), f.record) } } func TestMultipleRevisions_MultipleRequests_PartialSuccess(t *testing.T) { + _, kna := fakeClients() + kna.ServingV1alpha1().Revisions(testNamespace).Create( + newRevisionBuilder().withRevisionName("rev1").build()) + kna.ServingV1alpha1().Revisions(testNamespace).Create( + newRevisionBuilder().withRevisionName("rev2").build()) ep1 := Endpoint{"ip1", 8080} status2 := Status(http.StatusInternalServerError) error2 := fmt.Errorf("test error") f := newFakeActivator(t, map[revisionID]activationResult{ - revisionID{"default", "config", "rev1"}: activationResult{ + revisionID{testNamespace, testConfiguration, "rev1"}: activationResult{ endpoint: ep1, status: Status(0), err: nil, }, - revisionID{"default", "config", "rev2"}: activationResult{ + revisionID{testNamespace, testConfiguration, "rev2"}: activationResult{ endpoint: Endpoint{}, status: status2, err: error2, }, }) - d := NewDedupingActivator(f) + d := NewDedupingActivator(f, kna, TestLogger(t), &mockReporter{}) got := concurrentTest(d, f, []revisionID{ - revisionID{"default", "config", "rev1"}, - revisionID{"default", "config", "rev2"}, - revisionID{"default", "config", "rev1"}, - revisionID{"default", "config", "rev2"}, + revisionID{testNamespace, testConfiguration, "rev1"}, + revisionID{testNamespace, testConfiguration, "rev2"}, + revisionID{testNamespace, testConfiguration, "rev1"}, + revisionID{testNamespace, testConfiguration, "rev2"}, }) want := []activationResult{ @@ -161,21 +182,25 @@ func TestMultipleRevisions_MultipleRequests_PartialSuccess(t *testing.T) { } func TestSingleRevision_MultipleRequests_FailureRecovery(t *testing.T) { + _, kna := fakeClients() + kna.ServingV1alpha1().Revisions(testNamespace).Create( + newRevisionBuilder(). + withServingState(v1alpha1.RevisionServingStateReserve).build()) failEp := Endpoint{} failStatus := Status(503) failErr := fmt.Errorf("test error") f := newFakeActivator(t, map[revisionID]activationResult{ - revisionID{"default", "config", "rev1"}: activationResult{ + revisionID{testNamespace, testConfiguration, testRevision}: activationResult{ endpoint: failEp, status: failStatus, err: failErr, }, }) - d := NewDedupingActivator(Activator(f)) + d := NewDedupingActivator(Activator(f), kna, TestLogger(t), &mockReporter{}) // Activation initially fails - endpoint, status, err := d.ActiveEndpoint("default", "config", "rev1") + endpoint, status, err := d.ActiveEndpoint(testNamespace, testConfiguration, testRevision) if err != failErr { t.Errorf("Unexpected error. Want %v. Got %v.", failErr, err) @@ -193,13 +218,13 @@ func TestSingleRevision_MultipleRequests_FailureRecovery(t *testing.T) { // Later activation succeeds successEp := Endpoint{"ip", 8080} successStatus := Status(0) - f.responses[revisionID{"default", "config", "rev1"}] = activationResult{ + f.responses[revisionID{testNamespace, testConfiguration, testRevision}] = activationResult{ endpoint: successEp, status: successStatus, err: nil, } - endpoint, status, err = d.ActiveEndpoint("default", "config", "rev1") + endpoint, status, err = d.ActiveEndpoint(testNamespace, testConfiguration, testRevision) if err != nil { t.Errorf("Unexpected error. Want %v. Got %v.", nil, err) @@ -216,23 +241,27 @@ func TestSingleRevision_MultipleRequests_FailureRecovery(t *testing.T) { } func TestShutdown_ReturnError(t *testing.T) { + _, kna := fakeClients() + kna.ServingV1alpha1().Revisions(testNamespace).Create( + newRevisionBuilder(). + withServingState(v1alpha1.RevisionServingStateReserve).build()) ep := Endpoint{"ip", 8080} f := newFakeActivator(t, map[revisionID]activationResult{ - revisionID{"default", "config", "rev1"}: activationResult{ + revisionID{testNamespace, testConfiguration, testRevision}: activationResult{ endpoint: ep, status: Status(0), err: nil, }, }) - d := NewDedupingActivator(Activator(f)) - f.hold(revisionID{"default", "config", "rev1"}) + d := NewDedupingActivator(Activator(f), kna, TestLogger(t), &mockReporter{}) + f.hold(revisionID{testNamespace, testConfiguration, testRevision}) go func() { time.Sleep(100 * time.Millisecond) d.Shutdown() }() - endpoint, status, err := d.ActiveEndpoint("default", "config", "rev1") + endpoint, status, err := d.ActiveEndpoint(testNamespace, testConfiguration, testRevision) want := Endpoint{} if endpoint != want { diff --git a/pkg/activator/revision_test.go b/pkg/activator/revision_test.go index a33cc38f6190..c7771df6548b 100644 --- a/pkg/activator/revision_test.go +++ b/pkg/activator/revision_test.go @@ -40,7 +40,15 @@ const ( type mockReporter struct{} -func (r *mockReporter) Report(ns string, config string, rev string, m Measurement, v float64) error { +func (r *mockReporter) ReportRequest(ns, config, rev, servingState string, v float64) error { + return nil +} + +func (r *mockReporter) ReportResponseCount(ns, config, rev string, responseCode, numTries int, v float64) error { + return nil +} + +func (r *mockReporter) ReportResponseTime(ns, config, rev string, responseCode int, d time.Duration) error { return nil } @@ -250,6 +258,11 @@ func (b *revisionBuilder) build() *v1alpha1.Revision { return b.revision } +func (b *revisionBuilder) withRevisionName(name string) *revisionBuilder { + b.revision.ObjectMeta.Name = name + return b +} + func (b *revisionBuilder) withServingState(servingState v1alpha1.RevisionServingStateType) *revisionBuilder { b.revision.Spec.ServingState = servingState return b diff --git a/pkg/activator/stats_reporter.go b/pkg/activator/stats_reporter.go index 05051c854dd1..15aa28567382 100644 --- a/pkg/activator/stats_reporter.go +++ b/pkg/activator/stats_reporter.go @@ -59,7 +59,7 @@ var ( type StatsReporter interface { ReportRequest(ns, config, rev, servingState string, v float64) error ReportResponseCount(ns, config, rev string, responseCode, numTries int, v float64) error - ReportResponseTime(ns, config, rev string, d time.Duration) error + ReportResponseTime(ns, config, rev string, responseCode int, d time.Duration) error } // Reporter holds cached metric objects to report autoscaler metrics @@ -127,7 +127,7 @@ func NewStatsReporter() (*Reporter, error) { Description: "The response time in millisecond", Measure: measurements[ResponseTimeInMsecM], Aggregation: view.Distribution(1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000, 11000, 12000, 13000, 14000, 15000), - TagKeys: []tag.Key{r.namespaceTagKey, r.configTagKey, r.revisionTagKey}, + TagKeys: []tag.Key{r.namespaceTagKey, r.configTagKey, r.revisionTagKey, r.responseCodeKey}, }, ) if err != nil { @@ -198,7 +198,7 @@ func (r *Reporter) ReportResponseCount(ns, config, rev string, responseCode, num return nil } -func (r *Reporter) ReportResponseTime(ns, config, rev string, d time.Duration) error { +func (r *Reporter) ReportResponseTime(ns, config, rev string, responseCode int, d time.Duration) error { if !r.initialized { return errors.New("StatsReporter is not initialized yet") } @@ -207,7 +207,8 @@ func (r *Reporter) ReportResponseTime(ns, config, rev string, d time.Duration) e context.Background(), tag.Insert(r.namespaceTagKey, ns), tag.Insert(r.configTagKey, config), - tag.Insert(r.revisionTagKey, rev)) + tag.Insert(r.revisionTagKey, rev), + tag.Insert(r.responseCodeKey, strconv.Itoa(responseCode))) if err != nil { return err } diff --git a/pkg/activator/stats_reporter_test.go b/pkg/activator/stats_reporter_test.go index 495f22af7722..fc9db9528839 100644 --- a/pkg/activator/stats_reporter_test.go +++ b/pkg/activator/stats_reporter_test.go @@ -15,61 +15,126 @@ package activator import ( "testing" + "time" "go.opencensus.io/stats/view" ) -// var expectedType = map[string]struct{}{ -// "LastValueData": view.LastValueData, -// "CountData": view.CountData, -// } - -func TestActivatorReporter_Report(t *testing.T) { +func TestActivatorReporter(t *testing.T) { r := &Reporter{} - if err := r.Report("testNs", "testConfig", "testRev", RequestCountReserveM, 1); err == nil { - t.Error("Reporter.Report() expected an error for Report call before init. Got success.") + if err := r.ReportRequest("testns", "testconfig", "testrev", "Reserved", 1); err == nil { + t.Error("Reporter expected an error for Report call before init. Got success.") + } + if err := r.ReportResponseCount("testns", "testconfig", "testrev", 200, 1, 1); err == nil { + t.Error("Reporter expected an error for Report call before init. Got success.") + } + + var err error + if r, err = NewStatsReporter(); err != nil { + t.Error("Failed to create a new reporter.") + } + + // test ReportRequest + wantTags1 := map[string]string{ + "destination_namespace": "testns", + "destination_configuration": "testconfig", + "destination_revision": "testrev", + "serving_state": "Reserved", + } + expectSuccess(t, func() error { return r.ReportRequest("testns", "testconfig", "testrev", "Reserved", 1) }) + expectSuccess(t, func() error { return r.ReportRequest("testns", "testconfig", "testrev", "Reserved", 2.0) }) + checkSumData(t, "revision_request_count", wantTags1, 3) + + // test ReportResponseCount + wantTags2 := map[string]string{ + "destination_namespace": "testns", + "destination_configuration": "testconfig", + "destination_revision": "testrev", + "response_code": "200", + "num_tries": "6", } + expectSuccess(t, func() error { return r.ReportResponseCount("testns", "testconfig", "testrev", 200, 6, 1) }) + expectSuccess(t, func() error { return r.ReportResponseCount("testns", "testconfig", "testrev", 200, 6, 3) }) + checkSumData(t, "revision_response_count", wantTags2, 4) - r, _ = NewStatsReporter() - wantTags := map[string]string{ - "configuration_namespace": "testns", - "configuration": "testconfig", - "revision": "testrev", + // test ReportResponseTime + wantTags3 := map[string]string{ + "destination_namespace": "testns", + "destination_configuration": "testconfig", + "destination_revision": "testrev", + "response_code": "200", } - expectSuccess(t, func() error { return r.Report("testns", "testconfig", "testrev", RequestCountReserveM, 1) }) - expectSuccess(t, func() error { return r.Report("testns", "testconfig", "testrev", RequestCountReserveM, 1) }) - checkData(t, "request_count_reserve", wantTags, 2) + expectSuccess(t, func() error { + return r.ReportResponseTime("testns", "testconfig", "testrev", 200, 1100*time.Millisecond) + }) + expectSuccess(t, func() error { + return r.ReportResponseTime("testns", "testconfig", "testrev", 200, 9100*time.Millisecond) + }) + checkDistributionData(t, "response_time_msec", wantTags3, 2, 1100, 9100) } func expectSuccess(t *testing.T, f func() error) { if err := f(); err != nil { - t.Errorf("Reporter.Report() expected success but got error %v", err) + t.Errorf("Reporter expected success but got error %v", err) + } +} + +func checkSumData(t *testing.T, name string, wantTags map[string]string, wantValue int) { + if d, err := view.RetrieveData(name); err != nil { + t.Errorf("Reporter error = %v, wantErr %v", err, false) + } else { + if len(d) != 1 { + t.Errorf("Reporter len(d) %v, want %v", len(d), 1) + } + for _, got := range d[0].Tags { + if want, ok := wantTags[got.Key.Name()]; !ok { + t.Errorf("Reporter got an extra tag %v: %v", got.Key.Name(), got.Value) + } else { + if got.Value != want { + t.Errorf("Reporter expected a different tag value. key:%v, got: %v, want: %v", got.Key.Name(), got.Value, want) + } + } + } + + if s, ok := d[0].Data.(*view.SumData); !ok { + t.Error("Reporter expected a SumData type") + } else { + if s.Value != (float64)(wantValue) { + t.Errorf("Reporter expected %v got %v. metric: %v", (int64)(wantValue), s.Value, name) + } + } } } -func checkData(t *testing.T, name string, wantTags map[string]string, wantValue int) { +func checkDistributionData(t *testing.T, name string, wantTags map[string]string, expectedCount int, expectedMin float64, expectedMax float64) { if d, err := view.RetrieveData(name); err != nil { - t.Errorf("Reporter.Report() error = %v, wantErr %v", err, false) + t.Errorf("Reporter error = %v, wantErr %v", err, false) } else { if len(d) != 1 { - t.Errorf("Reporter.Report() len(d) %v, want %v", len(d), 1) + t.Errorf("Reporter len(d) %v, want %v", len(d), 1) } for _, got := range d[0].Tags { if want, ok := wantTags[got.Key.Name()]; !ok { - t.Errorf("Reporter.Report() got an extra tag %v: %v", got.Key.Name(), got.Value) + t.Errorf("Reporter got an extra tag %v: %v", got.Key.Name(), got.Value) } else { if got.Value != want { - t.Errorf("Reporter.Report() expected a different tag value. key:%v, got: %v, want: %v", got.Key.Name(), got.Value, want) + t.Errorf("Reporter expected a different tag value. key:%v, got: %v, want: %v", got.Key.Name(), got.Value, want) } } } - if s, ok := d[0].Data.(*view.CountData); !ok { - t.Error("Reporter.Report() expected a CountData type") + if s, ok := d[0].Data.(*view.DistributionData); !ok { + t.Error("Reporter expected a DistributionData type") } else { - if s.Value != (int64)(wantValue) { - t.Errorf("Reporter.Report() expected %v got %v. metric: %v", (int64)(wantValue), s.Value, name) + if s.Count != int64(expectedCount) { + t.Errorf("Reporter expected count %v got %v. metric: %v", (int64)(expectedCount), s.Count, name) + } + if s.Min != float64(expectedMin) { + t.Errorf("Reporter expected min %v got %v. metric: %v", expectedMin, s.Min, name) + } + if s.Max != float64(expectedMax) { + t.Errorf("Reporter expected max %v got %v. metric: %v", expectedMax, s.Max, name) } } } From e30b737205278a8aefa174fd29a630d94301ef5e Mon Sep 17 00:00:00 2001 From: Yao Wu Date: Tue, 10 Jul 2018 15:41:30 -0700 Subject: [PATCH 8/9] minor cleanup --- pkg/activator/dedupe.go | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/pkg/activator/dedupe.go b/pkg/activator/dedupe.go index c7534754b845..078d71bf6814 100644 --- a/pkg/activator/dedupe.go +++ b/pkg/activator/dedupe.go @@ -108,11 +108,6 @@ func (a *dedupingActivator) activate(id revisionID) { } state = string(revision.Spec.ServingState) - // if reqs, ok := a.pendingRequests[id]; ok { - // if err := a.reportRequests(id, len(reqs)); err != nil { - // logger.Errorf("Failed to report request count metrics for revision %s for namespace %s", id.name, id.namespace) - // } - // } endpoint, status, err := a.activator.ActiveEndpoint(id.namespace, id.configuration, id.name) a.mux.Lock() defer a.mux.Unlock() @@ -130,15 +125,3 @@ func (a *dedupingActivator) activate(id revisionID) { } } } - -// func (a *dedupingActivator) reportRequests(id revisionID, count int) error { -// logger := loggerWithRevisionInfo(a.logger, id.namespace, id.name) -// revisionClient := a.knaClient.ServingV1alpha1().Revisions(id.namespace) -// revision, err := revisionClient.Get(id.name, metav1.GetOptions{}) -// if err != nil { -// return fmt.Errorf("Unable to get revision %s for namespace: %s", id.name, id.namespace) -// } -// a.reporter.ReportRequest(id.namespace, id.configuration, id.name, string(revision.Spec.ServingState), float64(count)) -// logger.Infof("Wrote request_count metric for revision %s for namespace %s with value %d", id.name, id.namespace, count) -// return nil -// } From d3b88fd34f4c7eba00b725e145024122a28d785a Mon Sep 17 00:00:00 2001 From: Yao Wu Date: Wed, 11 Jul 2018 00:00:04 -0700 Subject: [PATCH 9/9] minor change --- cmd/activator/main.go | 2 +- pkg/activator/stats_reporter.go | 19 ------------------- 2 files changed, 1 insertion(+), 20 deletions(-) diff --git a/cmd/activator/main.go b/cmd/activator/main.go index 8b6dad18e38c..f71e2df55e57 100644 --- a/cmd/activator/main.go +++ b/cmd/activator/main.go @@ -119,7 +119,7 @@ func (rrt retryRoundTripper) RoundTrip(r *http.Request) (*http.Response, error) rrt.reporter.ReportResponseCount(namespace, config, name, resp.StatusCode, i, 1.0) rrt.reporter.ReportResponseTime(namespace, config, name, resp.StatusCode, time.Now().Sub(rrt.start)) } - return resp, nil + return resp, err } func (a *activationHandler) handler(w http.ResponseWriter, r *http.Request) { diff --git a/pkg/activator/stats_reporter.go b/pkg/activator/stats_reporter.go index 15aa28567382..6b0b7a58d971 100644 --- a/pkg/activator/stats_reporter.go +++ b/pkg/activator/stats_reporter.go @@ -138,25 +138,6 @@ func NewStatsReporter() (*Reporter, error) { return r, nil } -// Report captures value v for measurement m. The revision rev is in namespace ns and its owner is config -func (r *Reporter) Report(ns string, config string, rev string, m Measurement, v float64) error { - if !r.initialized { - return errors.New("StatsReporter is not initialized yet") - } - - ctx, err := tag.New( - context.Background(), - tag.Insert(r.namespaceTagKey, ns), - tag.Insert(r.configTagKey, config), - tag.Insert(r.revisionTagKey, rev)) - if err != nil { - return err - } - - stats.Record(ctx, measurements[m].M(v)) - return nil -} - // reportRequest captures value v for measurement m. func (r *Reporter) ReportRequest(ns, config, rev, servingState string, v float64) error { if !r.initialized {