diff --git a/metrics/metricstest/metricstest.go b/metrics/metricstest/metricstest.go new file mode 100644 index 0000000000..91416da26c --- /dev/null +++ b/metrics/metricstest/metricstest.go @@ -0,0 +1,161 @@ +/* +Copyright 2019 The Knative Authors +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metricstest + +import ( + "testing" + + "go.opencensus.io/stats/view" +) + +// CheckStatsReported checks that there is a view registered with the given name for each string in names, +// and that each view has at least one record. +func CheckStatsReported(t *testing.T, names ...string) { + t.Helper() + for _, name := range names { + d, err := view.RetrieveData(name) + if err != nil { + t.Errorf("For metric %s: Reporter.Report() error = %v", name, err) + } + if len(d) < 1 { + t.Errorf("For metric %s: No data reported when data was expected, view data is empty.", name) + } + } +} + +// CheckStatsNotReported checks that there are no records for any views that a name matching a string in names. +// Names that do not match registered views are considered not reported. +func CheckStatsNotReported(t *testing.T, names ...string) { + t.Helper() + for _, name := range names { + d, err := view.RetrieveData(name) + // err == nil means a valid stat exists matching "name" + // len(d) > 0 means a component recorded metrics for that stat + if err == nil && len(d) > 0 { + t.Errorf("For metric %s: Unexpected data reported when no data was expected. Reporter len(d) = %d", name, len(d)) + } + } +} + +// CheckCountData checks the view with a name matching string name to verify that the CountData stats +// reported are tagged with the tags in wantTags and that wantValue matches reported count. +func CheckCountData(t *testing.T, name string, wantTags map[string]string, wantValue int64) { + t.Helper() + if row := checkExactlyOneRow(t, name, wantTags); row != nil { + checkRowTags(t, row, name, wantTags) + + if s, ok := row.Data.(*view.CountData); !ok { + t.Errorf("For metric %s: Reporter expected a CountData type", name) + } else if s.Value != wantValue { + t.Errorf("For metric %s: value = %v, want: %d", name, s.Value, wantValue) + } + } +} + +// CheckDistributionData checks the view with a name matching string name to verify that the DistributionData stats reported +// are tagged with the tags in wantTags and that expectedCount number of records were reported. +// It also checks that expectedMin and expectedMax match the minimum and maximum reported values, respectively. +func CheckDistributionData(t *testing.T, name string, wantTags map[string]string, expectedCount int64, expectedMin float64, expectedMax float64) { + t.Helper() + if row := checkExactlyOneRow(t, name, wantTags); row != nil { + checkRowTags(t, row, name, wantTags) + + if s, ok := row.Data.(*view.DistributionData); !ok { + t.Errorf("For metric %s: Reporter expected a DistributionData type", name) + } else { + if s.Count != expectedCount { + t.Errorf("For metric %s: reporter count = %d, want = %d", name, s.Count, expectedCount) + } + if s.Min != expectedMin { + t.Errorf("For metric %s: reporter count = %f, want = %f", name, s.Min, expectedMin) + } + if s.Max != expectedMax { + t.Errorf("For metric %s: reporter count = %f, want = %f", name, s.Max, expectedMax) + } + } + } +} + +// CheckLastValueData checks the view with a name matching string name to verify that the LastValueData stats +// reported are tagged with the tags in wantTags and that wantValue matches reported last value. +func CheckLastValueData(t *testing.T, name string, wantTags map[string]string, wantValue float64) { + t.Helper() + if row := checkExactlyOneRow(t, name, wantTags); row != nil { + checkRowTags(t, row, name, wantTags) + + if s, ok := row.Data.(*view.LastValueData); !ok { + t.Errorf("For metric %s: Reporter.Report() expected a LastValueData type", name) + } else if s.Value != wantValue { + t.Errorf("For metric %s: Reporter.Report() expected %v got %v", name, s.Value, wantValue) + } + } +} + +// CheckSumData checks the view with a name matching string name to verify that the SumData stats +// reported are tagged with the tags in wantTags and that wantValue matches the reported sum. +func CheckSumData(t *testing.T, name string, wantTags map[string]string, wantValue float64) { + t.Helper() + if row := checkExactlyOneRow(t, name, wantTags); row != nil { + checkRowTags(t, row, name, wantTags) + + if s, ok := row.Data.(*view.SumData); !ok { + t.Errorf("For metric %s: Reporter expected a SumData type", name) + } else if s.Value != wantValue { + t.Errorf("For metric %s: value = %v, want: %v", name, s.Value, wantValue) + } + } +} + +// Unregister unregisters the metrics that were registered. +// This is useful for testing since golang execute test iterations within the same process and +// opencensus views maintain global state. At the beginning of each test, tests should +// unregister for all metrics and then re-register for the same metrics. This effectively clears +// out any existing data and avoids a panic due to re-registering a metric. +// +// In normal process shutdown, metrics do not need to be unregistered. +func Unregister(names ...string) { + for _, n := range names { + if v := view.Find(n); v != nil { + view.Unregister(v) + } + } +} + +func checkExactlyOneRow(t *testing.T, name string, wantTags map[string]string) *view.Row { + t.Helper() + d, err := view.RetrieveData(name) + if err != nil { + t.Errorf("For metric %s: Reporter.Report() error = %v", name, err) + return nil + } + if len(d) != 1 { + t.Errorf("For metric %s: Reporter.Report() len(d)=%v, want 1", name, len(d)) + } + + return d[0] +} + +func checkRowTags(t *testing.T, row *view.Row, name string, wantTags map[string]string) { + t.Helper() + for _, got := range row.Tags { + n := got.Key.Name() + if want, ok := wantTags[n]; !ok { + t.Errorf("For metric %s: Reporter got an extra tag %v: %v", name, n, got.Value) + } else if got.Value != want { + t.Errorf("For metric %s: Reporter expected a different tag value for key: %s, got: %s, want: %s", name, n, got.Value, want) + } + } +} diff --git a/metrics/record.go b/metrics/record.go index 4e435ee511..1b045ea0ab 100644 --- a/metrics/record.go +++ b/metrics/record.go @@ -54,3 +54,14 @@ func Record(ctx context.Context, ms stats.Measurement) { stats.Record(ctx, ms) } } + +// Buckets125 generates an array of buckets with approximate powers-of-two +// buckets that also aligns with powers of 10 on every 3rd step. This can +// be used to create a view.Distribution. +func Buckets125(low, high float64) []float64 { + buckets := []float64{low} + for last := low; last < high; last = last * 10 { + buckets = append(buckets, 2*last, 5*last, 10*last) + } + return buckets +} diff --git a/metrics/record_test.go b/metrics/record_test.go index 8e8ffd01c2..5f1b84f734 100644 --- a/metrics/record_test.go +++ b/metrics/record_test.go @@ -20,6 +20,8 @@ import ( "context" "testing" + "knative.dev/pkg/metrics/metricstest" + "go.opencensus.io/stats" "go.opencensus.io/stats/view" ) @@ -69,7 +71,7 @@ func TestRecord(t *testing.T) { for _, test := range shouldReportCases { setCurMetricsConfig(test.metricsConfig) Record(ctx, test.measurement) - checkLastValueData(t, test.measurement.Measure().Name(), test.measurement.Value()) + metricstest.CheckLastValueData(t, test.measurement.Measure().Name(), map[string]string{}, test.measurement.Value()) } shouldNotReportCases := []struct { @@ -91,30 +93,6 @@ func TestRecord(t *testing.T) { for _, test := range shouldNotReportCases { setCurMetricsConfig(test.metricsConfig) Record(ctx, test.measurement) - checkLastValueData(t, test.measurement.Measure().Name(), 4) // The value is still the last one of shouldReportCases - } -} - -func checkLastValueData(t *testing.T, name string, wantValue float64) { - t.Helper() - if row := checkRow(t, name); row != nil { - if s, ok := row.Data.(*view.LastValueData); !ok { - t.Error("Reporter.Report() expected a LastValueData type") - } else if s.Value != wantValue { - t.Errorf("Reporter.Report() expected %v got %v. metric: %v", s.Value, wantValue, name) - } - } -} - -func checkRow(t *testing.T, name string) *view.Row { - t.Helper() - d, err := view.RetrieveData(name) - if err != nil { - t.Fatalf("Reporter.Report() error = %v, wantErr %v", err, false) - return nil - } - if len(d) != 1 { - t.Fatalf("Reporter.Report() len(d)=%v, want 1", len(d)) + metricstest.CheckLastValueData(t, test.measurement.Measure().Name(), map[string]string{}, 4) // The value is still the last one of shouldReportCases } - return d[0] } diff --git a/webhook/stats_reporter.go b/webhook/stats_reporter.go new file mode 100644 index 0000000000..a1bd4d5356 --- /dev/null +++ b/webhook/stats_reporter.go @@ -0,0 +1,151 @@ +/* +Copyright 2019 The Knative Authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package webhook + +import ( + "context" + "strconv" + "time" + + "go.opencensus.io/stats" + "go.opencensus.io/stats/view" + "go.opencensus.io/tag" + admissionv1beta1 "k8s.io/api/admission/v1beta1" + "knative.dev/pkg/metrics" +) + +const ( + requestCountName = "request_count" + requestLatenciesName = "request_latencies" +) + +var ( + requestCountM = stats.Int64( + requestCountName, + "The number of requests that are routed to webhook", + stats.UnitDimensionless) + responseTimeInMsecM = stats.Float64( + requestLatenciesName, + "The response time in milliseconds", + stats.UnitMilliseconds) + + // Create the tag keys that will be used to add tags to our measurements. + // Tag keys must conform to the restrictions described in + // go.opencensus.io/tag/validate.go. Currently those restrictions are: + // - length between 1 and 255 inclusive + // - characters are printable US-ASCII + requestOperationKey = mustNewTagKey("request_operation") + kindGroupKey = mustNewTagKey("kind_group") + kindVersionKey = mustNewTagKey("kind_version") + kindKindKey = mustNewTagKey("kind_kind") + resourceGroupKey = mustNewTagKey("resource_group") + resourceVersionKey = mustNewTagKey("resource_version") + resourceResourceKey = mustNewTagKey("resource_resource") + resourceNameKey = mustNewTagKey("resource_name") + resourceNamespaceKey = mustNewTagKey("resource_namespace") + admissionAllowedKey = mustNewTagKey("admission_allowed") +) + +func init() { + register() +} + +// StatsReporter reports webhook metrics +type StatsReporter interface { + ReportRequest(request *admissionv1beta1.AdmissionRequest, response *admissionv1beta1.AdmissionResponse, d time.Duration) error +} + +// reporter implements StatsReporter interface +type reporter struct { + ctx context.Context +} + +// NewStatsReporter creaters a reporter for webhook metrics +func NewStatsReporter() (StatsReporter, error) { + ctx, err := tag.New( + context.Background(), + ) + if err != nil { + return nil, err + } + + return &reporter{ctx: ctx}, nil +} + +// Captures req count metric, recording the count and the duration +func (r *reporter) ReportRequest(req *admissionv1beta1.AdmissionRequest, resp *admissionv1beta1.AdmissionResponse, d time.Duration) error { + ctx, err := tag.New( + r.ctx, + tag.Insert(requestOperationKey, string(req.Operation)), + tag.Insert(kindGroupKey, req.Kind.Group), + tag.Insert(kindVersionKey, req.Kind.Version), + tag.Insert(kindKindKey, req.Kind.Kind), + tag.Insert(resourceGroupKey, req.Resource.Group), + tag.Insert(resourceVersionKey, req.Resource.Version), + tag.Insert(resourceResourceKey, req.Resource.Resource), + tag.Insert(resourceNameKey, req.Name), + tag.Insert(resourceNamespaceKey, req.Namespace), + tag.Insert(admissionAllowedKey, strconv.FormatBool(resp.Allowed)), + ) + if err != nil { + return err + } + + metrics.Record(ctx, requestCountM.M(1)) + // Convert time.Duration in nanoseconds to milliseconds + metrics.Record(ctx, responseTimeInMsecM.M(float64(d/time.Millisecond))) + return nil +} + +func register() { + tagKeys := []tag.Key{ + requestOperationKey, + kindGroupKey, + kindVersionKey, + kindKindKey, + resourceGroupKey, + resourceVersionKey, + resourceResourceKey, + resourceNamespaceKey, + resourceNameKey, + admissionAllowedKey} + + if err := view.Register( + &view.View{ + Description: requestCountM.Description(), + Measure: requestCountM, + Aggregation: view.Count(), + TagKeys: tagKeys, + }, + &view.View{ + Description: responseTimeInMsecM.Description(), + Measure: responseTimeInMsecM, + Aggregation: view.Distribution(metrics.Buckets125(1, 100000)...), // [1 2 5 10 20 50 100 200 500 1000 2000 5000 10000 20000 50000 100000]ms + TagKeys: tagKeys, + }, + ); err != nil { + panic(err) + } +} + +func mustNewTagKey(s string) tag.Key { + tagKey, err := tag.NewKey(s) + if err != nil { + panic(err) + } + return tagKey +} diff --git a/webhook/stats_reporter_test.go b/webhook/stats_reporter_test.go new file mode 100644 index 0000000000..a4a309d685 --- /dev/null +++ b/webhook/stats_reporter_test.go @@ -0,0 +1,76 @@ +/* +Copyright 2019 The Knative Authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package webhook + +import ( + "strconv" + "testing" + "time" + + admissionv1beta1 "k8s.io/api/admission/v1beta1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "knative.dev/pkg/metrics/metricstest" +) + +func TestWebhookStatsReporter(t *testing.T) { + setup() + req := &admissionv1beta1.AdmissionRequest{ + UID: "705ab4f5-6393-11e8-b7cc-42010a800002", + Kind: metav1.GroupVersionKind{Group: "autoscaling", Version: "v1", Kind: "Scale"}, + Resource: metav1.GroupVersionResource{Group: "apps", Version: "v1", Resource: "deployments"}, + Name: "my-deployment", + Namespace: "my-namespace", + Operation: admissionv1beta1.Update, + } + + resp := &admissionv1beta1.AdmissionResponse{ + UID: req.UID, + Allowed: true, + } + + r, _ := NewStatsReporter() + + shortTime, longTime := 1100.0, 9100.0 + expectedTags := map[string]string{ + requestOperationKey.Name(): string(req.Operation), + kindGroupKey.Name(): req.Kind.Group, + kindVersionKey.Name(): req.Kind.Version, + kindKindKey.Name(): req.Kind.Kind, + resourceGroupKey.Name(): req.Resource.Group, + resourceVersionKey.Name(): req.Resource.Version, + resourceResourceKey.Name(): req.Resource.Resource, + resourceNameKey.Name(): req.Name, + resourceNamespaceKey.Name(): req.Namespace, + admissionAllowedKey.Name(): strconv.FormatBool(resp.Allowed), + } + + r.ReportRequest(req, resp, time.Duration(shortTime)*time.Millisecond) + r.ReportRequest(req, resp, time.Duration(longTime)*time.Millisecond) + + metricstest.CheckCountData(t, requestCountName, expectedTags, 2) + metricstest.CheckDistributionData(t, requestLatenciesName, expectedTags, 2, shortTime, longTime) +} + +func setup() { + resetMetrics() +} + +// opencensus metrics carry global state that need to be reset between unit tests +func resetMetrics() { + metricstest.Unregister(requestCountName, requestLatenciesName) + register() +} diff --git a/webhook/webhook.go b/webhook/webhook.go index 7f23b47edc..08ff6b9027 100644 --- a/webhook/webhook.go +++ b/webhook/webhook.go @@ -114,10 +114,11 @@ type ResourceDefaulter func(patches *[]jsonpatch.JsonPatchOperation, crd Generic // AdmissionController implements the external admission webhook for validation of // pilot configuration. type AdmissionController struct { - Client kubernetes.Interface - Options ControllerOptions - Handlers map[schema.GroupVersionKind]GenericCRD - Logger *zap.SugaredLogger + Client kubernetes.Interface + Options ControllerOptions + Handlers map[schema.GroupVersionKind]GenericCRD + Logger *zap.SugaredLogger + StatsReporter StatsReporter WithContext func(context.Context) context.Context DisallowUnknownFields bool @@ -408,6 +409,7 @@ func (ac *AdmissionController) register( // ServeHTTP implements the external admission webhook for mutating // serving resources. func (ac *AdmissionController) ServeHTTP(w http.ResponseWriter, r *http.Request) { + var ttStart = time.Now() logger := ac.Logger logger.Infof("Webhook ServeHTTP request=%#v", r) @@ -452,6 +454,9 @@ func (ac *AdmissionController) ServeHTTP(w http.ResponseWriter, r *http.Request) http.Error(w, fmt.Sprintf("could encode response: %v", err), http.StatusInternalServerError) return } + + // Only report valid requests + ac.StatsReporter.ReportRequest(review.Request, response.Response, time.Since(ttStart)) } func makeErrorStatus(reason string, args ...interface{}) *admissionv1beta1.AdmissionResponse { diff --git a/webhook/webhook_integration_test.go b/webhook/webhook_integration_test.go index 262b59abc3..b3fac8598d 100644 --- a/webhook/webhook_integration_test.go +++ b/webhook/webhook_integration_test.go @@ -32,6 +32,7 @@ import ( admissionv1beta1 "k8s.io/api/admission/v1beta1" authenticationv1 "k8s.io/api/authentication/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "knative.dev/pkg/metrics/metricstest" . "knative.dev/pkg/testing" ) @@ -85,6 +86,9 @@ func TestMissingContentType(t *testing.T) { if !strings.Contains(string(responseBody), "invalid Content-Type") { t.Errorf("Response body to contain 'invalid Content-Type' , got = '%s'", string(responseBody)) } + + // Stats are not reported for internal server errors + metricstest.CheckStatsNotReported(t, requestCountName, requestLatenciesName) } func TestEmptyRequestBody(t *testing.T) { @@ -218,6 +222,8 @@ func TestValidResponseForResource(t *testing.T) { if err != nil { t.Fatalf("Failed to decode response: %v", err) } + + metricstest.CheckStatsReported(t, requestCountName, requestLatenciesName) } func TestValidResponseForResourceWithContextDefault(t *testing.T) { @@ -421,6 +427,9 @@ func TestInvalidResponseForResource(t *testing.T) { if !strings.Contains(reviewResponse.Response.Result.Message, "spec.fieldWithValidation") { t.Errorf("Received unexpected response status message %s", reviewResponse.Response.Result.Message) } + + // Stats should be reported for requests that have admission disallowed + metricstest.CheckStatsReported(t, requestCountName, requestLatenciesName) } func TestWebhookClientAuth(t *testing.T) { @@ -468,6 +477,7 @@ func testSetup(t *testing.T) (*AdmissionController, string, error) { } createDeployment(ac) + resetMetrics() return ac, fmt.Sprintf("0.0.0.0:%d", port), nil } diff --git a/webhook/webhook_test.go b/webhook/webhook_test.go index ac5f26b7fc..9eb5824728 100644 --- a/webhook/webhook_test.go +++ b/webhook/webhook_test.go @@ -73,8 +73,12 @@ func newNonRunningTestAdmissionController(t *testing.T, options ControllerOption t.Helper() // Create fake clients kubeClient = fakekubeclientset.NewSimpleClientset() + statsReporter, srErr := NewStatsReporter() + if srErr != nil { + t.Fatalf("Failed to create new stats reporter: %v", srErr) + } - ac, err := NewAdmissionController(kubeClient, options, TestLogger(t)) + ac, err := NewAdmissionController(kubeClient, options, TestLogger(t), &statsReporter) if err != nil { t.Fatalf("Failed to create new admission controller: %v", err) } @@ -795,7 +799,7 @@ func setUserAnnotation(userC, userU string) jsonpatch.JsonPatchOperation { } func NewAdmissionController(client kubernetes.Interface, options ControllerOptions, - logger *zap.SugaredLogger) (*AdmissionController, error) { + logger *zap.SugaredLogger, statsReporter *StatsReporter) (*AdmissionController, error) { return &AdmissionController{ Client: client, Options: options, @@ -823,6 +827,7 @@ func NewAdmissionController(client kubernetes.Interface, options ControllerOptio Kind: "InnerDefaultResource", }: &InnerDefaultResource{}, }, - Logger: logger, + Logger: logger, + StatsReporter: *statsReporter, }, nil }