From 6eb91ab0d426bc314dac9a9e807e493b40234cab Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Thu, 23 Sep 2021 16:31:04 -0700 Subject: [PATCH 1/7] pkg/clusterconditions: Add a new package for cluster conditions Separated from the rest of the CVO stuff, because the insights folks might want to use this too [1]. [1]: https://github.com/openshift/enhancements/pull/837 --- pkg/clusterconditions/always/always.go | 36 ++++ pkg/clusterconditions/clusterconditions.go | 83 ++++++++ .../clusterconditions_test.go | 199 ++++++++++++++++++ pkg/clusterconditions/promql/promql.go | 42 ++++ 4 files changed, 360 insertions(+) create mode 100644 pkg/clusterconditions/always/always.go create mode 100644 pkg/clusterconditions/clusterconditions.go create mode 100644 pkg/clusterconditions/clusterconditions_test.go create mode 100644 pkg/clusterconditions/promql/promql.go diff --git a/pkg/clusterconditions/always/always.go b/pkg/clusterconditions/always/always.go new file mode 100644 index 0000000000..6267761eb7 --- /dev/null +++ b/pkg/clusterconditions/always/always.go @@ -0,0 +1,36 @@ +// Package always implements a cluster condition that always matches. +// +// https://github.com/openshift/enhancements/blob/master/enhancements/update/targeted-update-edge-blocking.md#always +package always + +import ( + "context" + "errors" + + configv1 "github.com/openshift/api/config/v1" + "github.com/openshift/cluster-version-operator/pkg/clusterconditions" +) + +// Always implements a cluster condition that always matches. +type Always struct{} + +var always = &Always{} + +// Valid returns an error if the condition contains any properties +// besides 'type'. +func (a *Always) Valid(ctx context.Context, condition *configv1.ClusterCondition) error { + if condition.PromQL != nil { + return errors.New("the 'promql' property is not valid for 'type: Always' conditions") + } + + return nil +} + +// Match always returns true. +func (a *Always) Match(ctx context.Context, condition *configv1.ClusterCondition) (bool, error) { + return true, nil +} + +func init() { + clusterconditions.Register("Always", always) +} diff --git a/pkg/clusterconditions/clusterconditions.go b/pkg/clusterconditions/clusterconditions.go new file mode 100644 index 0000000000..4581ff052d --- /dev/null +++ b/pkg/clusterconditions/clusterconditions.go @@ -0,0 +1,83 @@ +// Package clusterconditions implements cluster conditions for +// identifying metching clusters. +// +// https://github.com/openshift/enhancements/blob/master/enhancements/update/targeted-update-edge-blocking.md#cluster-condition-type-registry +package clusterconditions + +import ( + "context" + "fmt" + + configv1 "github.com/openshift/api/config/v1" + "k8s.io/apimachinery/pkg/util/errors" + "k8s.io/klog/v2" +) + +type Condition interface { + // Valid returns an error if the condition is expected to be + // rejected by the Kubernetes API server. For example, for + // missing or invalid data. + Valid(ctx context.Context, condition *configv1.ClusterCondition) error + + // Match returns whether the condition matches the current + // cluster (true), does not match the current cluster (false), + // or fails to evaluate (error). + Match(ctx context.Context, condition *configv1.ClusterCondition) (bool, error) +} + +// Registry is a registry of implemented condition types. +var Registry map[string]Condition + +// Register registers a condition type, and panics on any name collisions. +func Register(conditionType string, condition Condition) { + if Registry == nil { + Registry = make(map[string]Condition, 1) + } + if existing, ok := Registry[conditionType]; ok && condition != existing { + panic(fmt.Sprintf("cluster condition %q already registered", conditionType)) + } + Registry[conditionType] = condition +} + +// PruneInvalid returns a new slice with recognized, valid conditions. +// The error complains about any unrecognized or invalid conditions. +func PruneInvalid(ctx context.Context, matchingRules []configv1.ClusterCondition) ([]configv1.ClusterCondition, error) { + var valid []configv1.ClusterCondition + var errs []error + + for _, config := range matchingRules { + condition, ok := Registry[config.Type] + if !ok { + errs = append(errs, fmt.Errorf("Skipping unrecognized cluster condition type %q", config.Type)) + continue + } + if err := condition.Valid(ctx, &config); err != nil { + errs = append(errs, err) + continue + } + valid = append(valid, config) + } + + return valid, errors.NewAggregate(errs) +} + +// Match returns whether the cluster matches the given rules (true), +// does not match (false), or the rules fail to evaluate (error). +func Match(ctx context.Context, matchingRules []configv1.ClusterCondition) (bool, error) { + var errs []error + + for _, config := range matchingRules { + condition, ok := Registry[config.Type] + if !ok { + klog.V(4).Infof("Skipping unrecognized cluster condition type %q", config.Type) + continue + } + match, err := condition.Match(ctx, &config) + if err == nil { + return match, nil + } + errs = append(errs, err) + } + + return false, errors.NewAggregate(errs) +} diff --git a/pkg/clusterconditions/clusterconditions_test.go b/pkg/clusterconditions/clusterconditions_test.go new file mode 100644 index 0000000000..4acb9b0659 --- /dev/null +++ b/pkg/clusterconditions/clusterconditions_test.go @@ -0,0 +1,199 @@ +package clusterconditions_test + +import ( + "context" + "fmt" + "reflect" + "regexp" + "testing" + + configv1 "github.com/openshift/api/config/v1" + + "github.com/openshift/cluster-version-operator/pkg/clusterconditions" + _ "github.com/openshift/cluster-version-operator/pkg/clusterconditions/always" + _ "github.com/openshift/cluster-version-operator/pkg/clusterconditions/promql" +) + +// Error implements a cluster condition that always errors. +type Error struct { + count int +} + +// Valid always returns 'nil', because we are not using this type to +// exercise validation. +func (e *Error) Valid(ctx context.Context, condition *configv1.ClusterCondition) error { + return nil +} + +// Match always returns an error. +func (e *Error) Match(ctx context.Context, condition *configv1.ClusterCondition) (bool, error) { + e.count += 1 + return false, fmt.Errorf("test error %d", e.count) +} + +func TestPruneInvalid(t *testing.T) { + ctx := context.Background() + + for _, testCase := range []struct { + name string + conditions []configv1.ClusterCondition + expectedValid []configv1.ClusterCondition + expectedError *regexp.Regexp + }{ + { + name: "no conditions", + }, + { + name: "valid conditions", + conditions: []configv1.ClusterCondition{ + { + Type: "Always", + }, + { + Type: "PromQL", + PromQL: &configv1.PromQLClusterCondition{ + PromQL: "max(cluster_proxy_enabled{type=~\"https?\"})", + }, + }, + }, + expectedValid: []configv1.ClusterCondition{ + { + Type: "Always", + }, + { + Type: "PromQL", + PromQL: &configv1.PromQLClusterCondition{ + PromQL: "max(cluster_proxy_enabled{type=~\"https?\"})", + }, + }, + }, + }, + { + name: "some invalid conditions", + conditions: []configv1.ClusterCondition{ + { + Type: "Always", + }, + { + Type: "PromQL", + }, + }, + expectedValid: []configv1.ClusterCondition{ + { + Type: "Always", + }, + }, + expectedError: regexp.MustCompile("^the 'promql' property is required for 'type: PromQL' conditions$"), + }, + { + name: "all invalid", + conditions: []configv1.ClusterCondition{ + { + Type: "Always", + PromQL: &configv1.PromQLClusterCondition{}, + }, + { + Type: "PromQL", + }, + }, + expectedError: regexp.MustCompile("^[[]the 'promql' property is not valid for 'type: Always' conditions, the 'promql' property is required for 'type: PromQL' conditions]$"), + }, + } { + t.Run(testCase.name, func(t *testing.T) { + valid, err := clusterconditions.PruneInvalid(ctx, testCase.conditions) + if !reflect.DeepEqual(valid, testCase.expectedValid) { + t.Errorf("got valid %v but expected %v", valid, testCase.expectedValid) + } + if err != nil && testCase.expectedError == nil { + t.Errorf("unexpected error: %v", err) + } else if testCase.expectedError != nil && err == nil { + t.Errorf("unexpected success, expected: %s", testCase.expectedError) + } else if testCase.expectedError != nil && !testCase.expectedError.MatchString(err.Error()) { + t.Errorf("expected error %s, not: %v", testCase.expectedError, err) + } + }) + } +} + +func TestMatch(t *testing.T) { + ctx := context.Background() + clusterconditions.Register("Error", &Error{}) + + for _, testCase := range []struct { + name string + conditions []configv1.ClusterCondition + expectedMatch bool + expectedError *regexp.Regexp + }{ + { + name: "no conditions", + expectedMatch: false, + }, + { + name: "valid condition before unrecognized condition", + conditions: []configv1.ClusterCondition{ + { + Type: "Always", + }, + { + Type: "does-not-exist", + }, + }, + expectedMatch: true, + }, + { + name: "valid condition after unrecognized condition", + conditions: []configv1.ClusterCondition{ + { + Type: "does-not-exist", + }, + { + Type: "Always", + }, + }, + expectedMatch: true, + }, + { + name: "all unrecognized", + conditions: []configv1.ClusterCondition{ + { + Type: "does-not-exist-1", + }, + { + Type: "does-not-exist-2", + }, + }, + }, + { + name: "unrecognized and two errors", + conditions: []configv1.ClusterCondition{ + { + Type: "does-not-exist", + }, + { + Type: "Error", + }, + { + Type: "Error", + }, + }, + expectedError: regexp.MustCompile("^[[]test error 1, test error 2]"), + }, + } { + t.Run(testCase.name, func(t *testing.T) { + match, err := clusterconditions.Match(ctx, testCase.conditions) + if match != testCase.expectedMatch { + t.Errorf("got match %t but expected %t", match, testCase.expectedMatch) + } + if err != nil && testCase.expectedError == nil { + t.Errorf("unexpected error: %v", err) + } else if testCase.expectedError != nil && err == nil { + t.Errorf("unexpected success, expected: %s", testCase.expectedError) + } else if testCase.expectedError != nil && !testCase.expectedError.MatchString(err.Error()) { + t.Errorf("expected error %s, not: %v", testCase.expectedError, err) + } + }) + } + + delete(clusterconditions.Registry, "Error") +} diff --git a/pkg/clusterconditions/promql/promql.go b/pkg/clusterconditions/promql/promql.go new file mode 100644 index 0000000000..3600a42251 --- /dev/null +++ b/pkg/clusterconditions/promql/promql.go @@ -0,0 +1,42 @@ +// Package promql +// +// https://github.com/openshift/enhancements/blob/master/enhancements/update/targeted-update-edge-blocking.md#promql +package promql + +import ( + "context" + "errors" + + configv1 "github.com/openshift/api/config/v1" + "github.com/openshift/cluster-version-operator/pkg/clusterconditions" +) + +// PromQL implements a cluster condition that matches based on PromQL. +type PromQL struct{} + +var promql = &PromQL{} + +// Valid returns an error if the condition contains any properties +// besides 'type' and a valid `promql`. +func (p *PromQL) Valid(ctx context.Context, condition *configv1.ClusterCondition) error { + if condition.PromQL == nil { + return errors.New("the 'promql' property is required for 'type: PromQL' conditions") + } + + if condition.PromQL.PromQL == "" { + return errors.New("the 'promql.promql' query string must be non-empty for 'type: PromQL' conditions") + } + + return nil +} + +// Match returns true when the condition's PromQL evaluates to 1, +// false when the PromQL evaluates to 0, and an error if the PromQL +// returns no time series or returns a value besides 0 or 1. +func (p *PromQL) Match(ctx context.Context, condition *configv1.ClusterCondition) (bool, error) { + return false, errors.New("not yet implemented: PromQL matching") +} + +func init() { + clusterconditions.Register("PromQL", promql) +} From 8263002ad84f564a306f46ebc6a2b01bcb06e92e Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Mon, 27 Sep 2021 12:39:23 -0700 Subject: [PATCH 2/7] pkg/cincinnati: Capture and return conditional updates Shifting the Cincinnati -> ClusterVersion schema translation over into the Cincinnati package, because I don't want to have to maintain two separate, public Go structure sets. This captures the extended Cincinnati JSON described in the enhancement [1], but does not yet poll matching rules, manage conditionalEdges[].conditions, or promote recommended conditional edges into availableUpdates. [1]: https://github.com/openshift/enhancements/blob/2cc2d9b331532c852878a7c793f3a754914c824e/enhancements/update/targeted-update-edge-blocking.md --- cmd/main.go | 3 + pkg/cincinnati/cincinnati.go | 166 +++++++-- pkg/cincinnati/cincinnati_test.go | 548 ++++++++++++++++++++++++++---- pkg/cvo/availableupdates.go | 86 ++--- 4 files changed, 660 insertions(+), 143 deletions(-) diff --git a/cmd/main.go b/cmd/main.go index 1414798925..a11b054c30 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -5,6 +5,9 @@ import ( "github.com/spf13/cobra" "k8s.io/klog/v2" + + _ "github.com/openshift/cluster-version-operator/pkg/clusterconditions/always" + _ "github.com/openshift/cluster-version-operator/pkg/clusterconditions/promql" ) var ( diff --git a/pkg/cincinnati/cincinnati.go b/pkg/cincinnati/cincinnati.go index 07f97a367c..0e735f78ec 100644 --- a/pkg/cincinnati/cincinnati.go +++ b/pkg/cincinnati/cincinnati.go @@ -7,10 +7,14 @@ import ( "io/ioutil" "net/http" "net/url" + "sort" + "strings" "time" "github.com/blang/semver/v4" "github.com/google/uuid" + configv1 "github.com/openshift/api/config/v1" + "github.com/openshift/cluster-version-operator/pkg/clusterconditions" "k8s.io/klog/v2" ) @@ -35,9 +39,6 @@ func NewClient(id uuid.UUID, transport *http.Transport) Client { return Client{id: id, transport: transport} } -// Update is a single node from the update graph. -type Update node - // Error is returned when are unable to get updates. type Error struct { // Reason is the reason suggested for the ClusterOperator status condition. @@ -56,14 +57,17 @@ func (err *Error) Error() string { } // GetUpdates fetches the current and next-applicable update payloads from the specified -// upstream Cincinnati stack given the current version and channel. The next- -// applicable updates are determined by downloading the update graph, finding -// the current version within that graph (typically the root node), and then -// finding all of the children. These children are the available updates for -// the current version and their payloads indicate from where the actual update -// image can be downloaded. -func (c Client) GetUpdates(ctx context.Context, uri *url.URL, arch string, channel string, version semver.Version) (Update, []Update, error) { - var current Update +// upstream Cincinnati stack given the current version and channel. The command: +// +// 1. Downloads the update graph from the requested URI for the requested arch and channel. +// 2. Finds the current version entry under .nodes. +// 3. Finds recommended next-hop updates by searching .edges for updates from the current +// version. Returns a slice of target Releases with these unconditional recommendations. +// 4. Finds conditionally recommended next-hop updates by searching .conditionalEdges for +// updates from the current version. Returns a slice of ConditionalUpdates with these +// conditional recommendations. +func (c Client) GetUpdates(ctx context.Context, uri *url.URL, arch string, channel string, version semver.Version) (configv1.Release, []configv1.Release, []configv1.ConditionalUpdate, error) { + var current configv1.Release // Prepare parametrized cincinnati query. queryParams := uri.Query() queryParams.Add("arch", arch) @@ -75,7 +79,7 @@ func (c Client) GetUpdates(ctx context.Context, uri *url.URL, arch string, chann // Download the update graph. req, err := http.NewRequest("GET", uri.String(), nil) if err != nil { - return current, nil, &Error{Reason: "InvalidRequest", Message: err.Error(), cause: err} + return current, nil, nil, &Error{Reason: "InvalidRequest", Message: err.Error(), cause: err} } req.Header.Add("Accept", GraphMediaType) if c.transport != nil && c.transport.TLSClientConfig != nil { @@ -101,23 +105,23 @@ func (c Client) GetUpdates(ctx context.Context, uri *url.URL, arch string, chann defer cancel() resp, err := client.Do(req.WithContext(timeoutCtx)) if err != nil { - return current, nil, &Error{Reason: "RemoteFailed", Message: err.Error(), cause: err} + return current, nil, nil, &Error{Reason: "RemoteFailed", Message: err.Error(), cause: err} } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { - return current, nil, &Error{Reason: "ResponseFailed", Message: fmt.Sprintf("unexpected HTTP status: %s", resp.Status)} + return current, nil, nil, &Error{Reason: "ResponseFailed", Message: fmt.Sprintf("unexpected HTTP status: %s", resp.Status)} } // Parse the graph. body, err := ioutil.ReadAll(resp.Body) if err != nil { - return current, nil, &Error{Reason: "ResponseFailed", Message: err.Error(), cause: err} + return current, nil, nil, &Error{Reason: "ResponseFailed", Message: err.Error(), cause: err} } var graph graph if err = json.Unmarshal(body, &graph); err != nil { - return current, nil, &Error{Reason: "ResponseInvalid", Message: err.Error(), cause: err} + return current, nil, nil, &Error{Reason: "ResponseInvalid", Message: err.Error(), cause: err} } // Find the current version within the graph. @@ -126,13 +130,19 @@ func (c Client) GetUpdates(ctx context.Context, uri *url.URL, arch string, chann for i, node := range graph.Nodes { if version.EQ(node.Version) { currentIdx = i - current = Update(graph.Nodes[i]) found = true + current, err = convertRetrievedUpdateToRelease(graph.Nodes[i]) + if err != nil { + return current, nil, nil, &Error{ + Reason: "ResponseInvalid", + Message: fmt.Sprintf("invalid current node: %s", err), + } + } break } } if !found { - return current, nil, &Error{ + return current, nil, nil, &Error{ Reason: "VersionNotFound", Message: fmt.Sprintf("currently reconciling cluster version %s not found in the %q channel", version, channel), } @@ -146,17 +156,98 @@ func (c Client) GetUpdates(ctx context.Context, uri *url.URL, arch string, chann } } - var updates []Update + var updates []configv1.Release for _, i := range nextIdxs { - updates = append(updates, Update(graph.Nodes[i])) + update, err := convertRetrievedUpdateToRelease(graph.Nodes[i]) + if err != nil { + return current, nil, nil, &Error{ + Reason: "ResponseInvalid", + Message: fmt.Sprintf("invalid recommended update node: %s", err), + } + } + updates = append(updates, update) + } + + var conditionalUpdates []configv1.ConditionalUpdate + for _, conditionalEdges := range graph.ConditionalEdges { + for _, edge := range conditionalEdges.Edges { + if version.String() == edge.From { + var target *node + for i, node := range graph.Nodes { + if node.Version.String() == edge.To { + target = &graph.Nodes[i] + break + } + } + if target == nil { + return current, updates, nil, &Error{ + Reason: "ResponseInvalid", + Message: fmt.Sprintf("no node for conditional update %s", edge.To), + } + } + update, err := convertRetrievedUpdateToRelease(*target) + if err != nil { + return current, updates, nil, &Error{ + Reason: "ResponseInvalid", + Message: fmt.Sprintf("invalid conditional update node: %s", err), + } + } + conditionalUpdates = append(conditionalUpdates, configv1.ConditionalUpdate{ + Release: update, + Risks: conditionalEdges.Risks, + }) + } + } + } + + for i := len(updates) - 1; i >= 0; i-- { + for _, conditionalUpdate := range conditionalUpdates { + if conditionalUpdate.Release.Image == updates[i].Image { + klog.Warningf("Update to %s listed as both a conditional and unconditional update; preferring the conditional update.", conditionalUpdate.Release.Version) + updates = append(updates[:i], updates[i+1:]...) + } + } + } + + if len(updates) == 0 { + updates = nil + } + + for i := len(conditionalUpdates) - 1; i >= 0; i-- { + for j, risk := range conditionalUpdates[i].Risks { + conditionalUpdates[i].Risks[j].MatchingRules, err = clusterconditions.PruneInvalid(ctx, risk.MatchingRules) + if len(conditionalUpdates[i].Risks[j].MatchingRules) == 0 { + klog.Warningf("Conditional update to %s, risk %q, has empty pruned matchingRules; dropping this target to avoid rejections when pushing to the Kubernetes API server. Pruning results: %s", conditionalUpdates[i].Release.Version, risk.Name, err) + conditionalUpdates = append(conditionalUpdates[:i], conditionalUpdates[i+1:]...) + } else if err != nil { + klog.Warningf("Conditional update to %s, risk %q, has pruned matchingRules (although other valid, recognized matchingRules were given, and are sufficient to keep the conditional update): %s", conditionalUpdates[i].Release.Version, risk.Name, err) + } + } + } + + targets := make(map[string]int, len(conditionalUpdates)) + for _, conditionalUpdate := range conditionalUpdates { + targets[conditionalUpdate.Release.Image]++ } - return current, updates, nil + for i := len(conditionalUpdates) - 1; i >= 0; i-- { + if targets[conditionalUpdates[i].Release.Image] > 1 { + klog.Warningf("Upstream declares %d conditional updates to %s; dropping them all.", targets[conditionalUpdates[i].Release.Image], conditionalUpdates[i].Release.Version) + conditionalUpdates = append(conditionalUpdates[:i], conditionalUpdates[i+1:]...) + } + } + + if len(conditionalUpdates) == 0 { + conditionalUpdates = nil + } + + return current, updates, conditionalUpdates, nil } type graph struct { - Nodes []node - Edges []edge + Nodes []node + Edges []edge + ConditionalEdges []conditionalEdges `json:"conditionalEdges"` } type node struct { @@ -170,6 +261,16 @@ type edge struct { Destination int } +type conditionalEdge struct { + From string `json:"from"` + To string `json:"to"` +} + +type conditionalEdges struct { + Edges []conditionalEdge `json:"edges"` + Risks []configv1.ConditionalUpdateRisk `json:"risks"` +} + // UnmarshalJSON unmarshals an edge in the update graph. The edge's JSON // representation is a two-element array of indices, but Go's representation is // a struct with two elements so this custom unmarshal method is required. @@ -188,3 +289,22 @@ func (e *edge) UnmarshalJSON(data []byte) error { return nil } + +func convertRetrievedUpdateToRelease(update node) (configv1.Release, error) { + cvoUpdate := configv1.Release{ + Version: update.Version.String(), + Image: update.Image, + } + if urlString, ok := update.Metadata["url"]; ok { + _, err := url.Parse(urlString) + if err != nil { + return cvoUpdate, fmt.Errorf("invalid URL for %s: %s", cvoUpdate.Version, err) + } + cvoUpdate.URL = configv1.URL(urlString) + } + if channels, ok := update.Metadata["io.openshift.upgrades.graph.release.channels"]; ok { + cvoUpdate.Channels = strings.Split(channels, ",") + sort.Strings(cvoUpdate.Channels) + } + return cvoUpdate, nil +} diff --git a/pkg/cincinnati/cincinnati_test.go b/pkg/cincinnati/cincinnati_test.go index 35c4f813e2..c46b31bd99 100644 --- a/pkg/cincinnati/cincinnati_test.go +++ b/pkg/cincinnati/cincinnati_test.go @@ -12,6 +12,9 @@ import ( "github.com/blang/semver/v4" "github.com/google/uuid" + configv1 "github.com/openshift/api/config/v1" + _ "github.com/openshift/cluster-version-operator/pkg/clusterconditions/always" + _ "github.com/openshift/cluster-version-operator/pkg/clusterconditions/promql" _ "k8s.io/klog/v2" // integration tests set glog flags. ) @@ -23,37 +26,493 @@ func TestGetUpdates(t *testing.T) { name string version string - expectedQuery string - current Update - available []Update - err string + expectedQuery string + graph string + current configv1.Release + available []configv1.Release + conditionalUpdates []configv1.ConditionalUpdate + err string }{{ - name: "one update available", - version: "4.0.0-4", - expectedQuery: "arch=test-arch&channel=test-channel&id=01234567-0123-0123-0123-0123456789ab&version=4.0.0-4", - current: Update{Version: semver.MustParse("4.0.0-4"), Image: "quay.io/openshift-release-dev/ocp-release:4.0.0-4"}, - available: []Update{ - {Version: semver.MustParse("4.0.0-5"), Image: "quay.io/openshift-release-dev/ocp-release:4.0.0-5"}, + name: "one update available", + version: "4.1.0", + graph: `{ + "nodes": [ + { + "version": "4.1.0", + "payload": "quay.io/openshift-release-dev/ocp-release:4.1.0", + "metadata": { + "url": "https://example.com/errata/4.1.0", + "io.openshift.upgrades.graph.release.channels": "test-channel,channel-a" + } + }, + { + "version": "4.1.1", + "payload": "quay.io/openshift-release-dev/ocp-release:4.1.1", + "metadata": { + "url": "https://example.com/errata/4.1.1", + "io.openshift.upgrades.graph.release.channels": "test-channel" + } + } + ], + "edges": [[0,1]] +}`, + expectedQuery: "arch=test-arch&channel=test-channel&id=01234567-0123-0123-0123-0123456789ab&version=4.1.0", + current: configv1.Release{ + Version: "4.1.0", + Image: "quay.io/openshift-release-dev/ocp-release:4.1.0", + URL: "https://example.com/errata/4.1.0", + Channels: []string{"channel-a", "test-channel"}, + }, + available: []configv1.Release{ + { + Version: "4.1.1", + Image: "quay.io/openshift-release-dev/ocp-release:4.1.1", + URL: "https://example.com/errata/4.1.1", + Channels: []string{"test-channel"}, + }, + }, + }, { + name: "two updates available", + version: "4.1.0", + graph: `{ + "nodes": [ + { + "version": "4.1.0", + "payload": "quay.io/openshift-release-dev/ocp-release:4.1.0", + "metadata": { + "url": "https://example.com/errata/4.1.0", + "io.openshift.upgrades.graph.release.channels": "test-channel,channel-a" + } + }, + { + "version": "4.1.1", + "payload": "quay.io/openshift-release-dev/ocp-release:4.1.1", + "metadata": { + "url": "https://example.com/errata/4.1.1", + "io.openshift.upgrades.graph.release.channels": "test-channel,channel-a" + } + }, + { + "version": "4.1.2", + "payload": "quay.io/openshift-release-dev/ocp-release:4.1.2", + "metadata": { + "url": "https://example.com/errata/4.1.2", + "io.openshift.upgrades.graph.release.channels": "test-channel" + } + }, + { + "version": "4.1.3", + "payload": "quay.io/openshift-release-dev/ocp-release:4.1.3", + "metadata": { + "url": "https://example.com/errata/4.1.3", + "io.openshift.upgrades.graph.release.channels": "test-channel" + } + } + ], + "edges": [[0,1], [0,2], [1,2], [2,3]] +}`, + expectedQuery: "arch=test-arch&channel=test-channel&id=01234567-0123-0123-0123-0123456789ab&version=4.1.0", + current: configv1.Release{ + Version: "4.1.0", + Image: "quay.io/openshift-release-dev/ocp-release:4.1.0", + URL: "https://example.com/errata/4.1.0", + Channels: []string{"channel-a", "test-channel"}, + }, + available: []configv1.Release{ + { + Version: "4.1.1", + Image: "quay.io/openshift-release-dev/ocp-release:4.1.1", + URL: "https://example.com/errata/4.1.1", + Channels: []string{"channel-a", "test-channel"}, + }, + { + Version: "4.1.2", + Image: "quay.io/openshift-release-dev/ocp-release:4.1.2", + URL: "https://example.com/errata/4.1.2", + Channels: []string{"test-channel"}, + }, + }, + }, { + name: "no updates available", + version: "4.1.0-0.okd-0", + graph: `{ + "nodes": [ + { + "version": "4.1.0-0.okd-0", + "payload": "quay.io/openshift-release-dev/ocp-release:4.1.0-0.okd-0", + "metadata": { + "url": "https://example.com/errata/4.1.0-0.okd-0", + "io.openshift.upgrades.graph.release.channels": "test-channel,channel-a" + } + }, + { + "version": "4.1.1", + "payload": "quay.io/openshift-release-dev/ocp-release:4.1.1", + "metadata": { + "url": "https://example.com/errata/4.1.1", + "io.openshift.upgrades.graph.release.channels": "test-channel" + } + }, + { + "version": "4.1.2", + "payload": "quay.io/openshift-release-dev/ocp-release:4.1.2", + "metadata": { + "url": "https://example.com/errata/4.1.2", + "io.openshift.upgrades.graph.release.channels": "test-channel" + } + } + ], + "edges": [[1,2]] +}`, + expectedQuery: "arch=test-arch&channel=test-channel&id=01234567-0123-0123-0123-0123456789ab&version=4.1.0-0.okd-0", + current: configv1.Release{ + Version: "4.1.0-0.okd-0", + Image: "quay.io/openshift-release-dev/ocp-release:4.1.0-0.okd-0", + URL: "https://example.com/errata/4.1.0-0.okd-0", + Channels: []string{"channel-a", "test-channel"}, + }, + }, { + name: "conditional updates available", + version: "4.1.0", + graph: `{ + "nodes": [ + { + "version": "4.1.0", + "payload": "quay.io/openshift-release-dev/ocp-release:4.1.0", + "metadata": { + "url": "https://example.com/errata/4.1.0", + "io.openshift.upgrades.graph.release.channels": "test-channel,channel-a" + } + }, + { + "version": "4.1.1", + "payload": "quay.io/openshift-release-dev/ocp-release:4.1.1", + "metadata": { + "url": "https://example.com/errata/4.1.1", + "io.openshift.upgrades.graph.release.channels": "test-channel,channel-a" + } + }, + { + "version": "4.1.2", + "payload": "quay.io/openshift-release-dev/ocp-release:4.1.2", + "metadata": { + "url": "https://example.com/errata/4.1.2", + "io.openshift.upgrades.graph.release.channels": "test-channel" + } + }, + { + "version": "4.1.3", + "payload": "quay.io/openshift-release-dev/ocp-release:4.1.3", + "metadata": { + "url": "https://example.com/errata/4.1.3", + "io.openshift.upgrades.graph.release.channels": "test-channel" + } + } + ], + "edges": [[0,1], [0,2], [1,2], [2,3]], + "conditionalEdges": [ + { + "edges": [{"from": "4.1.0", "to": "4.1.3"}], + "risks": [ + { + "url": "https://example.com/bug/123", + "name": "BugA", + "message": "On clusters with a Proxy configured, everything breaks.", + "matchingRules": [ + { + "type": "PromQL", + "promql": { + "promql": "max(cluster_proxy_enabled{type=~\"https?\"})" + } + } + ] + }, + { + "url": "https://example.com/bug/456", + "name": "BugB", + "message": "All 4.1.0 clusters are incompatible with 4.1.3, and must pass through 4.1.2 on their way to 4.1.3 to avoid breaking.", + "matchingRules": [ + { + "type": "Always" + } + ] + } + ] + } + ] +}`, + expectedQuery: "arch=test-arch&channel=test-channel&id=01234567-0123-0123-0123-0123456789ab&version=4.1.0", + current: configv1.Release{ + Version: "4.1.0", + Image: "quay.io/openshift-release-dev/ocp-release:4.1.0", + URL: "https://example.com/errata/4.1.0", + Channels: []string{"channel-a", "test-channel"}, + }, + available: []configv1.Release{ + { + Version: "4.1.1", + Image: "quay.io/openshift-release-dev/ocp-release:4.1.1", + URL: "https://example.com/errata/4.1.1", + Channels: []string{"channel-a", "test-channel"}, + }, + { + Version: "4.1.2", + Image: "quay.io/openshift-release-dev/ocp-release:4.1.2", + URL: "https://example.com/errata/4.1.2", + Channels: []string{"test-channel"}, + }, + }, + conditionalUpdates: []configv1.ConditionalUpdate{ + { + Release: configv1.Release{ + Version: "4.1.3", + Image: "quay.io/openshift-release-dev/ocp-release:4.1.3", + URL: "https://example.com/errata/4.1.3", + Channels: []string{"test-channel"}, + }, + Risks: []configv1.ConditionalUpdateRisk{ + { + URL: "https://example.com/bug/123", + Name: "BugA", + Message: "On clusters with a Proxy configured, everything breaks.", + MatchingRules: []configv1.ClusterCondition{ + { + Type: "PromQL", + PromQL: &configv1.PromQLClusterCondition{ + PromQL: "max(cluster_proxy_enabled{type=~\"https?\"})", + }, + }, + }, + }, { + URL: "https://example.com/bug/456", + Name: "BugB", + Message: "All 4.1.0 clusters are incompatible with 4.1.3, and must pass through 4.1.2 on their way to 4.1.3 to avoid breaking.", + MatchingRules: []configv1.ClusterCondition{ + { + Type: "Always", + }, + }, + }, + }, + }, }, }, { - name: "two updates available", - version: "4.0.0-5", - expectedQuery: "arch=test-arch&channel=test-channel&id=01234567-0123-0123-0123-0123456789ab&version=4.0.0-5", - current: Update{Version: semver.MustParse("4.0.0-5"), Image: "quay.io/openshift-release-dev/ocp-release:4.0.0-5"}, - available: []Update{ - {Version: semver.MustParse("4.0.0-6"), Image: "quay.io/openshift-release-dev/ocp-release:4.0.0-6"}, - {Version: semver.MustParse("4.0.0-6+2"), Image: "quay.io/openshift-release-dev/ocp-release:4.0.0-6+2"}, + name: "conditional updates available, but have no recognized rules", + version: "4.1.0", + graph: `{ + "nodes": [ + { + "version": "4.1.0", + "payload": "quay.io/openshift-release-dev/ocp-release:4.1.0", + "metadata": { + "url": "https://example.com/errata/4.1.0", + "io.openshift.upgrades.graph.release.channels": "test-channel,channel-a" + } + }, + { + "version": "4.1.1", + "payload": "quay.io/openshift-release-dev/ocp-release:4.1.1", + "metadata": { + "url": "https://example.com/errata/4.1.1", + "io.openshift.upgrades.graph.release.channels": "test-channel,channel-a" + } + } + ], + "conditionalEdges": [ + { + "edges": [{"from": "4.1.0", "to": "4.1.1"}], + "risks": [ + { + "url": "https://example.com/bug/123", + "name": "BugA", + "message": "On clusters with a Proxy configured, everything breaks.", + "matchingRules": [ + { + "type": "PromQL", + "promql": { + "promql": "max(cluster_proxy_enabled{type=~\"https?\"})" + } + } + ] + }, + { + "url": "https://example.com/bug/456", + "name": "BugB", + "message": "This risk has no recognized rules, and so the conditional update to 4.1.1 will be dropped to avoid rejections when pushing to the Kubernetes API server.", + "matchingRules": [ + { + "type": "does-not-exist" + } + ] + } + ] + } + ] +}`, + expectedQuery: "arch=test-arch&channel=test-channel&id=01234567-0123-0123-0123-0123456789ab&version=4.1.0", + current: configv1.Release{ + Version: "4.1.0", + Image: "quay.io/openshift-release-dev/ocp-release:4.1.0", + URL: "https://example.com/errata/4.1.0", + Channels: []string{"channel-a", "test-channel"}, }, }, { - name: "no updates available", - version: "4.0.0-0.okd-0", - current: Update{Version: semver.MustParse("4.0.0-0.okd-0"), Image: "quay.io/openshift-release-dev/ocp-release:4.0.0-0.okd-0"}, - expectedQuery: "arch=test-arch&channel=test-channel&id=01234567-0123-0123-0123-0123456789ab&version=4.0.0-0.okd-0", + name: "conditional updates available, and overlap with unconditional edge", + version: "4.1.0", + graph: `{ + "nodes": [ + { + "version": "4.1.0", + "payload": "quay.io/openshift-release-dev/ocp-release:4.1.0", + "metadata": { + "url": "https://example.com/errata/4.1.0", + "io.openshift.upgrades.graph.release.channels": "test-channel,channel-a" + } + }, + { + "version": "4.1.1", + "payload": "quay.io/openshift-release-dev/ocp-release:4.1.1", + "metadata": { + "url": "https://example.com/errata/4.1.1", + "io.openshift.upgrades.graph.release.channels": "test-channel" + } + } + ], + "edges": [[0,1]], + "conditionalEdges": [ + { + "edges": [{"from": "4.1.0", "to": "4.1.1"}], + "risks": [ + { + "url": "https://example.com/bug/123", + "name": "BugA", + "message": "On clusters with a Proxy configured, everything breaks.", + "matchingRules": [ + { + "type": "PromQL", + "promql": { + "promql": "max(cluster_proxy_enabled{type=~\"https?\"})" + } + } + ] + } + ] + } + ] +}`, + expectedQuery: "arch=test-arch&channel=test-channel&id=01234567-0123-0123-0123-0123456789ab&version=4.1.0", + current: configv1.Release{ + Version: "4.1.0", + Image: "quay.io/openshift-release-dev/ocp-release:4.1.0", + URL: "https://example.com/errata/4.1.0", + Channels: []string{"channel-a", "test-channel"}, + }, + conditionalUpdates: []configv1.ConditionalUpdate{ + { + Release: configv1.Release{ + Version: "4.1.1", + Image: "quay.io/openshift-release-dev/ocp-release:4.1.1", + URL: "https://example.com/errata/4.1.1", + Channels: []string{"test-channel"}, + }, + Risks: []configv1.ConditionalUpdateRisk{ + { + URL: "https://example.com/bug/123", + Name: "BugA", + Message: "On clusters with a Proxy configured, everything breaks.", + MatchingRules: []configv1.ClusterCondition{ + { + Type: "PromQL", + PromQL: &configv1.PromQLClusterCondition{ + PromQL: "max(cluster_proxy_enabled{type=~\"https?\"})", + }, + }, + }, + }, + }, + }, + }, }, { - name: "unknown version", - version: "4.0.0-3", - expectedQuery: "arch=test-arch&channel=test-channel&id=01234567-0123-0123-0123-0123456789ab&version=4.0.0-3", - err: "VersionNotFound: currently reconciling cluster version 4.0.0-3 not found in the \"test-channel\" channel", + name: "multiple conditional updates with a single target", + version: "4.1.0", + graph: `{ + "nodes": [ + { + "version": "4.1.0", + "payload": "quay.io/openshift-release-dev/ocp-release:4.1.0", + "metadata": { + "url": "https://example.com/errata/4.1.0", + "io.openshift.upgrades.graph.release.channels": "test-channel,channel-a" + } + }, + { + "version": "4.1.1", + "payload": "quay.io/openshift-release-dev/ocp-release:4.1.1", + "metadata": { + "url": "https://example.com/errata/4.1.1", + "io.openshift.upgrades.graph.release.channels": "test-channel" + } + } + ], + "conditionalEdges": [ + { + "edges": [{"from": "4.1.0", "to": "4.1.1"}], + "risks": [ + { + "url": "https://example.com/bug/123", + "name": "BugA", + "message": "On clusters with a Proxy configured, everything breaks.", + "matchingRules": [ + { + "type": "PromQL", + "promql": { + "promql": "max(cluster_proxy_enabled{type=~\"https?\"})" + } + } + ] + } + ] + }, { + "edges": [{"from": "4.1.0", "to": "4.1.1"}], + "risks": [ + { + "url": "https://example.com/bug/456", + "name": "BugB", + "message": "All 4.1.0 clusters are incompatible with 4.1.3, and must pass through 4.1.2 on their way to 4.1.3 to avoid breaking.", + "matchingRules": [ + { + "type": "Always" + } + ] + } + ] + } + ] +}`, + expectedQuery: "arch=test-arch&channel=test-channel&id=01234567-0123-0123-0123-0123456789ab&version=4.1.0", + current: configv1.Release{ + Version: "4.1.0", + Image: "quay.io/openshift-release-dev/ocp-release:4.1.0", + URL: "https://example.com/errata/4.1.0", + Channels: []string{"channel-a", "test-channel"}, + }, + }, { + name: "unknown version", + version: "4.1.0", + graph: `{ + "nodes": [ + { + "version": "4.1.1", + "payload": "quay.io/openshift-release-dev/ocp-release:4.1.1", + "metadata": { + "url": "https://example.com/errata/4.1.1", + "io.openshift.upgrades.graph.release.channels": "test-channel,channel-a" + } + } + ] +}`, + expectedQuery: "arch=test-arch&channel=test-channel&id=01234567-0123-0123-0123-0123456789ab&version=4.1.0", + err: "VersionNotFound: currently reconciling cluster version 4.1.0 not found in the \"test-channel\" channel", }} for _, test := range tests { t.Run(test.name, func(t *testing.T) { @@ -78,39 +537,7 @@ func TestGetUpdates(t *testing.T) { return } - _, err := w.Write([]byte(`{ - "nodes": [ - { - "version": "4.0.0-4", - "payload": "quay.io/openshift-release-dev/ocp-release:4.0.0-4" - }, - { - "version": "4.0.0-5", - "payload": "quay.io/openshift-release-dev/ocp-release:4.0.0-5" - }, - { - "version": "4.0.0-6", - "payload": "quay.io/openshift-release-dev/ocp-release:4.0.0-6" - }, - { - "version": "4.0.0-6+2", - "payload": "quay.io/openshift-release-dev/ocp-release:4.0.0-6+2" - }, - { - "version": "4.0.0-0.okd-0", - "payload": "quay.io/openshift-release-dev/ocp-release:4.0.0-0.okd-0" - }, - { - "version": "4.0.0-0.2", - "payload": "quay.io/openshift-release-dev/ocp-release:4.0.0-0.2" - }, - { - "version": "4.0.0-0.3", - "payload": "quay.io/openshift-release-dev/ocp-release:4.0.0-0.3" - } - ], - "edges": [[0,1],[1,2],[1,3],[5,6]] - }`)) + _, err := w.Write([]byte(test.graph)) if err != nil { w.WriteHeader(http.StatusInternalServerError) return @@ -127,7 +554,7 @@ func TestGetUpdates(t *testing.T) { t.Fatal(err) } - current, updates, err := c.GetUpdates(context.Background(), uri, arch, channelName, semver.MustParse(test.version)) + current, updates, conditionalUpdates, err := c.GetUpdates(context.Background(), uri, arch, channelName, semver.MustParse(test.version)) if test.err == "" { if err != nil { t.Fatalf("expected nil error, got: %v", err) @@ -138,6 +565,9 @@ func TestGetUpdates(t *testing.T) { if !reflect.DeepEqual(updates, test.available) { t.Fatalf("expected updates %v, got: %v", test.available, updates) } + if !reflect.DeepEqual(conditionalUpdates, test.conditionalUpdates) { + t.Fatalf("expected conditional updates %v, got: %v", test.conditionalUpdates, conditionalUpdates) + } } else { if err == nil || err.Error() != test.err { t.Fatalf("expected err to be %s, got: %v", test.err, err) diff --git a/pkg/cvo/availableupdates.go b/pkg/cvo/availableupdates.go index 049e99c16f..6b5a543624 100644 --- a/pkg/cvo/availableupdates.go +++ b/pkg/cvo/availableupdates.go @@ -6,8 +6,6 @@ import ( "net/http" "net/url" "runtime" - "sort" - "strings" "time" "github.com/blang/semver/v4" @@ -48,17 +46,18 @@ func (optr *Operator) syncAvailableUpdates(ctx context.Context, config *configv1 return err } - current, updates, condition := calculateAvailableUpdatesStatus(ctx, string(config.Spec.ClusterID), transport, upstream, arch, channel, optr.release.Version) + current, updates, conditionalUpdates, condition := calculateAvailableUpdatesStatus(ctx, string(config.Spec.ClusterID), transport, upstream, arch, channel, optr.release.Version) if usedDefaultUpstream { upstream = "" } optr.setAvailableUpdates(&availableUpdates{ - Upstream: upstream, - Channel: config.Spec.Channel, - Current: current, - Updates: updates, - Condition: condition, + Upstream: upstream, + Channel: config.Spec.Channel, + Current: current, + Updates: updates, + ConditionalUpdates: conditionalUpdates, + Condition: condition, }) // requeue optr.queue.Add(optr.queueKey()) @@ -84,8 +83,10 @@ type availableUpdates struct { // slice was empty. LastSyncOrConfigChange time.Time - Current configv1.Release - Updates []configv1.Release + Current configv1.Release + Updates []configv1.Release + ConditionalUpdates []configv1.ConditionalUpdate + Condition configv1.ClusterOperatorStatusCondition } @@ -101,6 +102,7 @@ func (u *availableUpdates) NeedsUpdate(original *configv1.ClusterVersion) *confi return nil } if equality.Semantic.DeepEqual(u.Updates, original.Status.AvailableUpdates) && + equality.Semantic.DeepEqual(u.ConditionalUpdates, original.Status.ConditionalUpdates) && equality.Semantic.DeepEqual(u.Condition, resourcemerge.FindOperatorStatusCondition(original.Status.Conditions, u.Condition.Type)) { return nil } @@ -108,6 +110,7 @@ func (u *availableUpdates) NeedsUpdate(original *configv1.ClusterVersion) *confi config := original.DeepCopy() resourcemerge.SetOperatorStatusCondition(&config.Status.Conditions, u.Condition) config.Status.AvailableUpdates = u.Updates + config.Status.ConditionalUpdates = u.ConditionalUpdates return config } @@ -144,10 +147,10 @@ func (optr *Operator) getAvailableUpdates() *availableUpdates { return optr.availableUpdates } -func calculateAvailableUpdatesStatus(ctx context.Context, clusterID string, transport *http.Transport, upstream, arch, channel, version string) (configv1.Release, []configv1.Release, configv1.ClusterOperatorStatusCondition) { +func calculateAvailableUpdatesStatus(ctx context.Context, clusterID string, transport *http.Transport, upstream, arch, channel, version string) (configv1.Release, []configv1.Release, []configv1.ConditionalUpdate, configv1.ClusterOperatorStatusCondition) { var cvoCurrent configv1.Release if len(upstream) == 0 { - return cvoCurrent, nil, configv1.ClusterOperatorStatusCondition{ + return cvoCurrent, nil, nil, configv1.ClusterOperatorStatusCondition{ Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse, Reason: "NoUpstream", Message: "No upstream server has been set to retrieve updates.", } @@ -155,7 +158,7 @@ func calculateAvailableUpdatesStatus(ctx context.Context, clusterID string, tran upstreamURI, err := url.Parse(upstream) if err != nil { - return cvoCurrent, nil, configv1.ClusterOperatorStatusCondition{ + return cvoCurrent, nil, nil, configv1.ClusterOperatorStatusCondition{ Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse, Reason: "InvalidURI", Message: fmt.Sprintf("failed to parse upstream URL: %s", err), } @@ -163,28 +166,28 @@ func calculateAvailableUpdatesStatus(ctx context.Context, clusterID string, tran uuid, err := uuid.Parse(string(clusterID)) if err != nil { - return cvoCurrent, nil, configv1.ClusterOperatorStatusCondition{ + return cvoCurrent, nil, nil, configv1.ClusterOperatorStatusCondition{ Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse, Reason: "InvalidID", Message: fmt.Sprintf("invalid cluster ID: %s", err), } } if len(arch) == 0 { - return cvoCurrent, nil, configv1.ClusterOperatorStatusCondition{ + return cvoCurrent, nil, nil, configv1.ClusterOperatorStatusCondition{ Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse, Reason: "NoArchitecture", Message: "The set of architectures has not been configured.", } } if len(version) == 0 { - return cvoCurrent, nil, configv1.ClusterOperatorStatusCondition{ + return cvoCurrent, nil, nil, configv1.ClusterOperatorStatusCondition{ Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse, Reason: "NoCurrentVersion", Message: "The cluster version does not have a semantic version assigned and cannot calculate valid upgrades.", } } if len(channel) == 0 { - return cvoCurrent, nil, configv1.ClusterOperatorStatusCondition{ + return cvoCurrent, nil, nil, configv1.ClusterOperatorStatusCondition{ Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse, Reason: noChannel, Message: "The update channel has not been configured.", } @@ -193,71 +196,32 @@ func calculateAvailableUpdatesStatus(ctx context.Context, clusterID string, tran currentVersion, err := semver.Parse(version) if err != nil { klog.V(2).Infof("Unable to parse current semantic version %q: %v", version, err) - return cvoCurrent, nil, configv1.ClusterOperatorStatusCondition{ + return cvoCurrent, nil, nil, configv1.ClusterOperatorStatusCondition{ Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse, Reason: "InvalidCurrentVersion", Message: "The current cluster version is not a valid semantic version and cannot be used to calculate upgrades.", } } - current, updates, err := cincinnati.NewClient(uuid, transport).GetUpdates(ctx, upstreamURI, arch, channel, currentVersion) + current, updates, conditionalUpdates, err := cincinnati.NewClient(uuid, transport).GetUpdates(ctx, upstreamURI, arch, channel, currentVersion) if err != nil { klog.V(2).Infof("Upstream server %s could not return available updates: %v", upstream, err) if updateError, ok := err.(*cincinnati.Error); ok { - return cvoCurrent, nil, configv1.ClusterOperatorStatusCondition{ + return cvoCurrent, nil, nil, configv1.ClusterOperatorStatusCondition{ Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse, Reason: updateError.Reason, Message: fmt.Sprintf("Unable to retrieve available updates: %s", updateError.Message), } } // this should never happen - return cvoCurrent, nil, configv1.ClusterOperatorStatusCondition{ + return cvoCurrent, nil, nil, configv1.ClusterOperatorStatusCondition{ Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse, Reason: "Unknown", Message: fmt.Sprintf("Unable to retrieve available updates: %s", err), } } - cvoCurrent, err = convertRetrievedUpdateToRelease(current) - if err != nil { - return cvoCurrent, nil, configv1.ClusterOperatorStatusCondition{ - Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse, Reason: "ResponseInvalid", - Message: fmt.Sprintf("Invalid recommended update node: %s", err), - } - } - - var cvoUpdates []configv1.Release - for _, update := range updates { - cvoUpdate, err := convertRetrievedUpdateToRelease(update) - if err != nil { - return cvoCurrent, nil, configv1.ClusterOperatorStatusCondition{ - Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse, Reason: "ResponseInvalid", - Message: fmt.Sprintf("Invalid recommended update node: %s", err), - } - } - cvoUpdates = append(cvoUpdates, cvoUpdate) - } - - return cvoCurrent, cvoUpdates, configv1.ClusterOperatorStatusCondition{ + return current, updates, conditionalUpdates, configv1.ClusterOperatorStatusCondition{ Type: configv1.RetrievedUpdates, Status: configv1.ConditionTrue, LastTransitionTime: metav1.Now(), } } - -func convertRetrievedUpdateToRelease(update cincinnati.Update) (configv1.Release, error) { - cvoUpdate := configv1.Release{ - Version: update.Version.String(), - Image: update.Image, - } - if urlString, ok := update.Metadata["url"]; ok { - _, err := url.Parse(urlString) - if err != nil { - return cvoUpdate, fmt.Errorf("invalid URL for %s: %s", cvoUpdate.Version, err) - } - cvoUpdate.URL = configv1.URL(urlString) - } - if channels, ok := update.Metadata["io.openshift.upgrades.graph.release.channels"]; ok { - cvoUpdate.Channels = strings.Split(channels, ",") - sort.Strings(cvoUpdate.Channels) - } - return cvoUpdate, nil -} From c9dd4792f3efeb3ac7964f1f2211e2b756173304 Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Wed, 29 Sep 2021 16:00:45 -0700 Subject: [PATCH 3/7] pkg/cvo/availableupdates: Sort (conditional)updates We want them to be sorted in status to avoid excessive churn, regardless of whether the upstream update service perserves consistent ordering. --- pkg/cvo/availableupdates.go | 13 +++++++++++++ pkg/cvo/cvo_test.go | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/pkg/cvo/availableupdates.go b/pkg/cvo/availableupdates.go index 6b5a543624..9fd13ef427 100644 --- a/pkg/cvo/availableupdates.go +++ b/pkg/cvo/availableupdates.go @@ -6,6 +6,7 @@ import ( "net/http" "net/url" "runtime" + "sort" "time" "github.com/blang/semver/v4" @@ -124,6 +125,18 @@ func (optr *Operator) setAvailableUpdates(u *availableUpdates) { } else { klog.Warningf("Unrecognized condition %s=%s (%s: %s): cannot judge update retrieval success", u.Condition.Type, u.Condition.Status, u.Condition.Reason, u.Condition.Message) } + + sort.Slice(u.Updates, func(i, j int) bool { + vi := semver.MustParse(u.Updates[i].Version) + vj := semver.MustParse(u.Updates[j].Version) + return vi.GTE(vj) + }) + + sort.Slice(u.ConditionalUpdates, func(i, j int) bool { + vi := semver.MustParse(u.ConditionalUpdates[i].Release.Version) + vj := semver.MustParse(u.ConditionalUpdates[j].Release.Version) + return vi.GTE(vj) + }) } optr.statusLock.Lock() diff --git a/pkg/cvo/cvo_test.go b/pkg/cvo/cvo_test.go index 3a9cf367e4..9f7b287f44 100644 --- a/pkg/cvo/cvo_test.go +++ b/pkg/cvo/cvo_test.go @@ -2602,8 +2602,8 @@ func TestOperator_availableUpdatesSync(t *testing.T) { Channel: "fast", Current: configv1.Release{Version: "4.0.1", Image: "image/image:v4.0.1"}, Updates: []configv1.Release{ - {Version: "4.0.2-prerelease", Image: "some.other.registry/image/image:v4.0.2"}, {Version: "4.0.2", Image: "image/image:v4.0.2"}, + {Version: "4.0.2-prerelease", Image: "some.other.registry/image/image:v4.0.2"}, }, Condition: configv1.ClusterOperatorStatusCondition{ Type: configv1.RetrievedUpdates, From aa16795de2b5d48b103a0a7d75498225b45497b0 Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Tue, 5 Oct 2021 13:13:54 -0700 Subject: [PATCH 4/7] pkg/cvo/availableupdates: Evaluate conditionalUpdates And manage inclusion/exclusion in availableUpdates appropriately, based on the Recommended condition. --- pkg/cvo/availableupdates.go | 76 ++++++++++++++++++++++++++++++++++++- 1 file changed, 74 insertions(+), 2 deletions(-) diff --git a/pkg/cvo/availableupdates.go b/pkg/cvo/availableupdates.go index 9fd13ef427..0a2f4d93a1 100644 --- a/pkg/cvo/availableupdates.go +++ b/pkg/cvo/availableupdates.go @@ -7,17 +7,20 @@ import ( "net/url" "runtime" "sort" + "strings" "time" "github.com/blang/semver/v4" "github.com/google/uuid" "k8s.io/apimachinery/pkg/api/equality" + "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/klog/v2" configv1 "github.com/openshift/api/config/v1" "github.com/openshift/cluster-version-operator/lib/resourcemerge" "github.com/openshift/cluster-version-operator/pkg/cincinnati" + "github.com/openshift/cluster-version-operator/pkg/clusterconditions" ) const noChannel string = "NoChannel" @@ -52,14 +55,19 @@ func (optr *Operator) syncAvailableUpdates(ctx context.Context, config *configv1 if usedDefaultUpstream { upstream = "" } - optr.setAvailableUpdates(&availableUpdates{ + + au := &availableUpdates{ Upstream: upstream, Channel: config.Spec.Channel, Current: current, Updates: updates, ConditionalUpdates: conditionalUpdates, Condition: condition, - }) + } + + au.evaluateConditionalUpdates(ctx) + optr.setAvailableUpdates(au) + // requeue optr.queue.Add(optr.queueKey()) return nil @@ -238,3 +246,67 @@ func calculateAvailableUpdatesStatus(ctx context.Context, clusterID string, tran LastTransitionTime: metav1.Now(), } } + +func (u *availableUpdates) evaluateConditionalUpdates(ctx context.Context) { + if u == nil { + return + } + + for i, conditionalUpdate := range u.ConditionalUpdates { + if errorCondition := evaluateConditionalUpdate(ctx, &conditionalUpdate); errorCondition != nil { + meta.SetStatusCondition(&conditionalUpdate.Conditions, *errorCondition) + u.removeUpdate(ctx, conditionalUpdate.Release.Image) + } else { + meta.SetStatusCondition(&conditionalUpdate.Conditions, metav1.Condition{ + Type: "Recommended", + Status: metav1.ConditionTrue, + // FIXME: ObservedGeneration? That would capture upstream/channel, but not necessarily the currently-reconciling version. + Reason: "AsExpected", + Message: "The update is recommended, because none of the conditional update risks apply to this cluster.", + }) + u.Updates = append(u.Updates, conditionalUpdate.Release) + } + u.ConditionalUpdates[i].Conditions = conditionalUpdate.Conditions + } +} + +func (u *availableUpdates) removeUpdate(ctx context.Context, image string) { + for i, update := range u.Updates { + if update.Image == image { + u.Updates = append(u.Updates[:i], u.Updates[i+1:]...) + } + } +} + +func evaluateConditionalUpdate(ctx context.Context, conditionalUpdate *configv1.ConditionalUpdate) *metav1.Condition { + recommended := &metav1.Condition{ + Type: "Recommended", + } + messages := []string{} + for _, risk := range conditionalUpdate.Risks { + if match, err := clusterconditions.Match(ctx, risk.MatchingRules); err != nil { + if recommended.Status != metav1.ConditionFalse { + recommended.Status = metav1.ConditionUnknown + } + if recommended.Reason == "" || recommended.Reason == "EvaluationFailed" { + recommended.Reason = "EvaluationFailed" + } else { + recommended.Reason = "MultipleReasons" + } + messages = append(messages, fmt.Sprintf("Exposure to %s is unknown due to an evaluation failure: %v\n%s %s", risk.Name, err, risk.Message, risk.URL)) + } else if match { + recommended.Status = metav1.ConditionFalse + if recommended.Reason == "" { + recommended.Reason = risk.Name + } else { + recommended.Reason = "MultipleReasons" + } + messages = append(messages, fmt.Sprintf("%s %s", risk.Message, risk.URL)) + } + } + if recommended.Status == "" { + return nil + } + recommended.Message = strings.Join(messages, "\n\n") + return recommended +} From fa6ff3709823e907f0eeb6c9f135ee994c184f55 Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Thu, 4 Nov 2021 23:24:09 -0700 Subject: [PATCH 5/7] pkg/clusterconditions/promql: Fill in the implementation The Thanos Service requests a TLS certificate, as documented in [1]: $ curl -s https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/origin-ci-test/logs/periodic-ci-openshift-release-master-ci-4.10-e2e-gcp/1457166426941952000/artifacts/e2e-gcp/gather-extra/artifacts/services.json | jq -r '.items[].metadata | select(.name == "thanos-querier").annotations' { "service.alpha.openshift.io/serving-cert-signed-by": "openshift-service-serving-signer@1636251390", "service.beta.openshift.io/serving-cert-secret-name": "thanos-querier-tls", "service.beta.openshift.io/serving-cert-signed-by": "openshift-service-serving-signer@1636251390" } The service-ca controller [2] provides a Secret with the requested cert and key: $ curl -s https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/origin-ci-test/logs/periodic-ci-openshift-release-master-ci-4.10-e2e-gcp/1457166426941952000/artifacts/e2e-gcp/gather-must-gather/artifacts/must-gather.tar | tar xOz registry-ci-openshift-org-ocp-4-10-2021-11-07-014119-sha256-bec152bc664c5ba6192357aabfcdd18810135c8e89800239ffee86b6a5d8730d/namespaces/openshift-monitoring/core/secrets.yaml | yaml2json | jq -r '.items[] | select(.metadata.name == "thanos-querier-tls").data["tls.crt"] | @base64d' | openssl x509 -text -noout | grep 'Issuer\|Subject:\|DNS' Issuer: CN = openshift-service-serving-signer@1636251390 Subject: CN = thanos-querier.openshift-monitoring.svc DNS:thanos-querier.openshift-monitoring.svc, DNS:thanos-querier.openshift-monitoring.svc.cluster.local From [3]: Other pods can trust cluster-created certificates (which are only signed for internal DNS names), by using the CA bundle in the /var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt file that is automatically mounted in their pod. However, checking a CVO term in a 4.10 cluster: sh-4.4# ls /var/run/secrets/kubernetes.io/serviceaccount ca.crt namespace token So instead I'm mounting the openshift-service-ca.crt ConfigMap, which has the service.beta.openshift.io/inject-cabundle annotation. I'm not entirely sure what the flow is for creating this ConfigMap. It's metadata.managedFields[].manager is kube-controller-manager; maybe similar ConfigMaps are created in all namespaces? [1]: https://docs.openshift.com/container-platform/4.9/security/certificate_types_descriptions/service-ca-certificates.html [2]: https://docs.openshift.com/container-platform/4.9/security/certificates/service-serving-certificate.html [3]: https://docs.openshift.com/container-platform/4.9/nodes/pods/nodes-pods-secrets.html#nodes-pods-secrets-certificates-about_nodes-pods-secrets --- ...luster-version-operator_03_deployment.yaml | 6 ++ pkg/clusterconditions/promql/promql.go | 76 ++++++++++++++++++- 2 files changed, 78 insertions(+), 4 deletions(-) diff --git a/install/0000_00_cluster-version-operator_03_deployment.yaml b/install/0000_00_cluster-version-operator_03_deployment.yaml index df09a55589..6e7233c80d 100644 --- a/install/0000_00_cluster-version-operator_03_deployment.yaml +++ b/install/0000_00_cluster-version-operator_03_deployment.yaml @@ -49,6 +49,9 @@ spec: - mountPath: /etc/tls/serving-cert name: serving-cert readOnly: true + - mountPath: /etc/tls/service-ca + name: service-ca + readOnly: true - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access readOnly: true @@ -97,6 +100,9 @@ spec: - name: serving-cert secret: secretName: cluster-version-operator-serving-cert + - name: service-ca + configMap: + name: openshift-service-ca.crt - name: kube-api-access projected: defaultMode: 420 diff --git a/pkg/clusterconditions/promql/promql.go b/pkg/clusterconditions/promql/promql.go index 3600a42251..4263cedc55 100644 --- a/pkg/clusterconditions/promql/promql.go +++ b/pkg/clusterconditions/promql/promql.go @@ -1,4 +1,4 @@ -// Package promql +// Package promql implements a cluster condition based on PromQL queries. // // https://github.com/openshift/enhancements/blob/master/enhancements/update/targeted-update-edge-blocking.md#promql package promql @@ -6,15 +6,40 @@ package promql import ( "context" "errors" + "fmt" + "time" configv1 "github.com/openshift/api/config/v1" + "github.com/prometheus/client_golang/api" + prometheusv1 "github.com/prometheus/client_golang/api/prometheus/v1" + "github.com/prometheus/common/config" + "github.com/prometheus/common/model" + "k8s.io/klog/v2" + "github.com/openshift/cluster-version-operator/pkg/clusterconditions" ) // PromQL implements a cluster condition that matches based on PromQL. -type PromQL struct{} +type PromQL struct { + // Address holds the Prometheus query URI. + Address string -var promql = &PromQL{} + // HTTPClientConfig holds the client configuration for connecting to the Prometheus service. + HTTPClientConfig config.HTTPClientConfig +} + +var promql = &PromQL{ + Address: "https://thanos-querier.openshift-monitoring.svc.cluster.local:9091", + HTTPClientConfig: config.HTTPClientConfig{ + Authorization: &config.Authorization{ + Type: "Bearer", + CredentialsFile: "/var/run/secrets/kubernetes.io/serviceaccount/token", + }, + TLSConfig: config.TLSConfig{ + CAFile: "/etc/tls/service-ca/service-ca.crt", + }, + }, +} // Valid returns an error if the condition contains any properties // besides 'type' and a valid `promql`. @@ -34,7 +59,50 @@ func (p *PromQL) Valid(ctx context.Context, condition *configv1.ClusterCondition // false when the PromQL evaluates to 0, and an error if the PromQL // returns no time series or returns a value besides 0 or 1. func (p *PromQL) Match(ctx context.Context, condition *configv1.ClusterCondition) (bool, error) { - return false, errors.New("not yet implemented: PromQL matching") + clientConfig := api.Config{Address: p.Address} + + if roundTripper, err := config.NewRoundTripperFromConfig(p.HTTPClientConfig, "cluster-conditions"); err == nil { + clientConfig.RoundTripper = roundTripper + } else { + return false, fmt.Errorf("creating PromQL round-tripper: %w", err) + } + + client, err := api.NewClient(clientConfig) + if err != nil { + return false, fmt.Errorf("creating PromQL client: %w", err) + } + + v1api := prometheusv1.NewAPI(client) + klog.V(4).Infof("evaluate %s cluster condition: %q", condition.Type, condition.PromQL.PromQL) + result, warnings, err := v1api.Query(ctx, condition.PromQL.PromQL, time.Now()) + if err != nil { + return false, fmt.Errorf("executing PromQL query: %w", err) + } + + for _, warning := range warnings { + klog.Warning(warning) + } + + if result.Type() != model.ValVector { + return false, fmt.Errorf("invalid PromQL result type is %s, not vector", result.Type()) + } + + vector, ok := result.(model.Vector) + if !ok { + return false, fmt.Errorf("invalid PromQL result type is nominally %s, but fails Vector cast", result.Type()) + } + + if vector.Len() != 1 { + return false, fmt.Errorf("invalid PromQL result length must be one, but is %d", vector.Len()) + } + + sample := vector[0] + if sample.Value == 0 { + return false, nil + } else if sample.Value == 1 { + return true, nil + } + return false, fmt.Errorf("invalid PromQL result (must be 0 or 1): %v", sample.Value) } func init() { From e3a21053dee1c05c65535e63ca64dec5c87ce67d Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Thu, 4 Nov 2021 23:53:13 -0700 Subject: [PATCH 6/7] vendor: Include the Prometheus client Now that the PromQL cluster condition consumes it. Generated with: $ go mod vendor $ go mod tidy $ git add -A go.* vendor using: $ go version go version go1.16.6 linux/arm64 --- go.mod | 1 + go.sum | 2 + vendor/github.com/jpillora/backoff/LICENSE | 21 + vendor/github.com/jpillora/backoff/README.md | 119 ++ vendor/github.com/jpillora/backoff/backoff.go | 100 ++ vendor/github.com/jpillora/backoff/go.mod | 3 + .../mwitkow/go-conntrack/.gitignore | 163 +++ .../mwitkow/go-conntrack/.travis.yml | 17 + .../github.com/mwitkow/go-conntrack/LICENSE | 201 +++ .../github.com/mwitkow/go-conntrack/README.md | 88 ++ .../mwitkow/go-conntrack/dialer_reporter.go | 108 ++ .../mwitkow/go-conntrack/dialer_wrapper.go | 166 +++ .../mwitkow/go-conntrack/listener_reporter.go | 43 + .../mwitkow/go-conntrack/listener_wrapper.go | 158 +++ .../prometheus/client_golang/api/client.go | 129 ++ .../client_golang/api/prometheus/v1/api.go | 1126 ++++++++++++++++ .../prometheus/common/config/config.go | 66 + .../prometheus/common/config/http_config.go | 804 ++++++++++++ .../x/net/internal/timeseries/timeseries.go | 525 ++++++++ vendor/golang.org/x/net/trace/events.go | 532 ++++++++ vendor/golang.org/x/net/trace/histogram.go | 365 ++++++ vendor/golang.org/x/net/trace/trace.go | 1130 +++++++++++++++++ .../clientcredentials/clientcredentials.go | 120 ++ vendor/modules.txt | 11 + 24 files changed, 5998 insertions(+) create mode 100644 vendor/github.com/jpillora/backoff/LICENSE create mode 100644 vendor/github.com/jpillora/backoff/README.md create mode 100644 vendor/github.com/jpillora/backoff/backoff.go create mode 100644 vendor/github.com/jpillora/backoff/go.mod create mode 100644 vendor/github.com/mwitkow/go-conntrack/.gitignore create mode 100644 vendor/github.com/mwitkow/go-conntrack/.travis.yml create mode 100644 vendor/github.com/mwitkow/go-conntrack/LICENSE create mode 100644 vendor/github.com/mwitkow/go-conntrack/README.md create mode 100644 vendor/github.com/mwitkow/go-conntrack/dialer_reporter.go create mode 100644 vendor/github.com/mwitkow/go-conntrack/dialer_wrapper.go create mode 100644 vendor/github.com/mwitkow/go-conntrack/listener_reporter.go create mode 100644 vendor/github.com/mwitkow/go-conntrack/listener_wrapper.go create mode 100644 vendor/github.com/prometheus/client_golang/api/client.go create mode 100644 vendor/github.com/prometheus/client_golang/api/prometheus/v1/api.go create mode 100644 vendor/github.com/prometheus/common/config/config.go create mode 100644 vendor/github.com/prometheus/common/config/http_config.go create mode 100644 vendor/golang.org/x/net/internal/timeseries/timeseries.go create mode 100644 vendor/golang.org/x/net/trace/events.go create mode 100644 vendor/golang.org/x/net/trace/histogram.go create mode 100644 vendor/golang.org/x/net/trace/trace.go create mode 100644 vendor/golang.org/x/oauth2/clientcredentials/clientcredentials.go diff --git a/go.mod b/go.mod index cba91c260d..3c369820be 100644 --- a/go.mod +++ b/go.mod @@ -15,6 +15,7 @@ require ( github.com/pkg/errors v0.9.1 github.com/prometheus/client_golang v1.11.0 github.com/prometheus/client_model v0.2.0 + github.com/prometheus/common v0.26.0 github.com/spf13/cobra v1.1.3 golang.org/x/net v0.0.0-20210520170846-37e1c6afe023 golang.org/x/time v0.0.0-20210723032227-1f47c861a9ac diff --git a/go.sum b/go.sum index 1d085d33f9..fe094fedd1 100644 --- a/go.sum +++ b/go.sum @@ -357,6 +357,7 @@ github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANyt github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= github.com/jonboulle/clockwork v0.2.2/go.mod h1:Pkfl5aHPm1nk2H9h0bjmnJD/BcgbGXUBGnn1kMkgxc8= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA= github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= github.com/json-iterator/go v0.0.0-20180612202835-f2b4162afba3/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= @@ -425,6 +426,7 @@ github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7P github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= +github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs= diff --git a/vendor/github.com/jpillora/backoff/LICENSE b/vendor/github.com/jpillora/backoff/LICENSE new file mode 100644 index 0000000000..1cc708081b --- /dev/null +++ b/vendor/github.com/jpillora/backoff/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2017 Jaime Pillora + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/vendor/github.com/jpillora/backoff/README.md b/vendor/github.com/jpillora/backoff/README.md new file mode 100644 index 0000000000..ee4d6230af --- /dev/null +++ b/vendor/github.com/jpillora/backoff/README.md @@ -0,0 +1,119 @@ +# Backoff + +A simple exponential backoff counter in Go (Golang) + +[![GoDoc](https://godoc.org/github.com/jpillora/backoff?status.svg)](https://godoc.org/github.com/jpillora/backoff) [![Circle CI](https://circleci.com/gh/jpillora/backoff.svg?style=shield)](https://circleci.com/gh/jpillora/backoff) + +### Install + +``` +$ go get -v github.com/jpillora/backoff +``` + +### Usage + +Backoff is a `time.Duration` counter. It starts at `Min`. After every call to `Duration()` it is multiplied by `Factor`. It is capped at `Max`. It returns to `Min` on every call to `Reset()`. `Jitter` adds randomness ([see below](#example-using-jitter)). Used in conjunction with the `time` package. + +--- + +#### Simple example + +``` go + +b := &backoff.Backoff{ + //These are the defaults + Min: 100 * time.Millisecond, + Max: 10 * time.Second, + Factor: 2, + Jitter: false, +} + +fmt.Printf("%s\n", b.Duration()) +fmt.Printf("%s\n", b.Duration()) +fmt.Printf("%s\n", b.Duration()) + +fmt.Printf("Reset!\n") +b.Reset() + +fmt.Printf("%s\n", b.Duration()) +``` + +``` +100ms +200ms +400ms +Reset! +100ms +``` + +--- + +#### Example using `net` package + +``` go +b := &backoff.Backoff{ + Max: 5 * time.Minute, +} + +for { + conn, err := net.Dial("tcp", "example.com:5309") + if err != nil { + d := b.Duration() + fmt.Printf("%s, reconnecting in %s", err, d) + time.Sleep(d) + continue + } + //connected + b.Reset() + conn.Write([]byte("hello world!")) + // ... Read ... Write ... etc + conn.Close() + //disconnected +} + +``` + +--- + +#### Example using `Jitter` + +Enabling `Jitter` adds some randomization to the backoff durations. [See Amazon's writeup of performance gains using jitter](http://www.awsarchitectureblog.com/2015/03/backoff.html). Seeding is not necessary but doing so gives repeatable results. + +```go +import "math/rand" + +b := &backoff.Backoff{ + Jitter: true, +} + +rand.Seed(42) + +fmt.Printf("%s\n", b.Duration()) +fmt.Printf("%s\n", b.Duration()) +fmt.Printf("%s\n", b.Duration()) + +fmt.Printf("Reset!\n") +b.Reset() + +fmt.Printf("%s\n", b.Duration()) +fmt.Printf("%s\n", b.Duration()) +fmt.Printf("%s\n", b.Duration()) +``` + +``` +100ms +106.600049ms +281.228155ms +Reset! +100ms +104.381845ms +214.957989ms +``` + +#### Documentation + +https://godoc.org/github.com/jpillora/backoff + +#### Credits + +Forked from [some JavaScript](https://github.com/segmentio/backo) written by [@tj](https://github.com/tj) diff --git a/vendor/github.com/jpillora/backoff/backoff.go b/vendor/github.com/jpillora/backoff/backoff.go new file mode 100644 index 0000000000..d113e68906 --- /dev/null +++ b/vendor/github.com/jpillora/backoff/backoff.go @@ -0,0 +1,100 @@ +// Package backoff provides an exponential-backoff implementation. +package backoff + +import ( + "math" + "math/rand" + "sync/atomic" + "time" +) + +// Backoff is a time.Duration counter, starting at Min. After every call to +// the Duration method the current timing is multiplied by Factor, but it +// never exceeds Max. +// +// Backoff is not generally concurrent-safe, but the ForAttempt method can +// be used concurrently. +type Backoff struct { + attempt uint64 + // Factor is the multiplying factor for each increment step + Factor float64 + // Jitter eases contention by randomizing backoff steps + Jitter bool + // Min and Max are the minimum and maximum values of the counter + Min, Max time.Duration +} + +// Duration returns the duration for the current attempt before incrementing +// the attempt counter. See ForAttempt. +func (b *Backoff) Duration() time.Duration { + d := b.ForAttempt(float64(atomic.AddUint64(&b.attempt, 1) - 1)) + return d +} + +const maxInt64 = float64(math.MaxInt64 - 512) + +// ForAttempt returns the duration for a specific attempt. This is useful if +// you have a large number of independent Backoffs, but don't want use +// unnecessary memory storing the Backoff parameters per Backoff. The first +// attempt should be 0. +// +// ForAttempt is concurrent-safe. +func (b *Backoff) ForAttempt(attempt float64) time.Duration { + // Zero-values are nonsensical, so we use + // them to apply defaults + min := b.Min + if min <= 0 { + min = 100 * time.Millisecond + } + max := b.Max + if max <= 0 { + max = 10 * time.Second + } + if min >= max { + // short-circuit + return max + } + factor := b.Factor + if factor <= 0 { + factor = 2 + } + //calculate this duration + minf := float64(min) + durf := minf * math.Pow(factor, attempt) + if b.Jitter { + durf = rand.Float64()*(durf-minf) + minf + } + //ensure float64 wont overflow int64 + if durf > maxInt64 { + return max + } + dur := time.Duration(durf) + //keep within bounds + if dur < min { + return min + } + if dur > max { + return max + } + return dur +} + +// Reset restarts the current attempt counter at zero. +func (b *Backoff) Reset() { + atomic.StoreUint64(&b.attempt, 0) +} + +// Attempt returns the current attempt counter value. +func (b *Backoff) Attempt() float64 { + return float64(atomic.LoadUint64(&b.attempt)) +} + +// Copy returns a backoff with equals constraints as the original +func (b *Backoff) Copy() *Backoff { + return &Backoff{ + Factor: b.Factor, + Jitter: b.Jitter, + Min: b.Min, + Max: b.Max, + } +} diff --git a/vendor/github.com/jpillora/backoff/go.mod b/vendor/github.com/jpillora/backoff/go.mod new file mode 100644 index 0000000000..7c41bc6f58 --- /dev/null +++ b/vendor/github.com/jpillora/backoff/go.mod @@ -0,0 +1,3 @@ +module github.com/jpillora/backoff + +go 1.13 diff --git a/vendor/github.com/mwitkow/go-conntrack/.gitignore b/vendor/github.com/mwitkow/go-conntrack/.gitignore new file mode 100644 index 0000000000..406e49369a --- /dev/null +++ b/vendor/github.com/mwitkow/go-conntrack/.gitignore @@ -0,0 +1,163 @@ +# Created by .ignore support plugin (hsz.mobi) +### JetBrains template +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff: +.idea +.idea/workspace.xml +.idea/tasks.xml +.idea/dictionaries +.idea/vcs.xml +.idea/jsLibraryMappings.xml + +# Sensitive or high-churn files: +.idea/dataSources.ids +.idea/dataSources.xml +.idea/dataSources.local.xml +.idea/sqlDataSources.xml +.idea/dynamic.xml +.idea/uiDesigner.xml + +# Gradle: +.idea/gradle.xml +.idea/libraries + +# Mongo Explorer plugin: +.idea/mongoSettings.xml + +## File-based project format: +*.iws + +## Plugin-specific files: + +# IntelliJ +/out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties +### Go template +# Compiled Object files, Static and Dynamic libs (Shared Objects) +*.o +*.a +*.so + +# Folders +_obj +_test + +# Architecture specific extensions/prefixes +*.[568vq] +[568vq].out + +*.cgo1.go +*.cgo2.c +_cgo_defun.c +_cgo_gotypes.go +_cgo_export.* + +_testmain.go + +*.exe +*.test +*.prof +### Python template +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*,cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# IPython Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# dotenv +.env + +# virtualenv +venv/ +ENV/ + +# Spyder project settings +.spyderproject + +# Rope project settings +.ropeproject + diff --git a/vendor/github.com/mwitkow/go-conntrack/.travis.yml b/vendor/github.com/mwitkow/go-conntrack/.travis.yml new file mode 100644 index 0000000000..a9654fa05a --- /dev/null +++ b/vendor/github.com/mwitkow/go-conntrack/.travis.yml @@ -0,0 +1,17 @@ +sudo: false +language: go +go: +- "1.8" +- "1.9" +- "1.10" +- "1.11" +- "1.12" + +install: +- go get github.com/stretchr/testify +- go get github.com/prometheus/client_golang/prometheus +- go get golang.org/x/net/context +- go get golang.org/x/net/trace + +script: +- go test -v ./... diff --git a/vendor/github.com/mwitkow/go-conntrack/LICENSE b/vendor/github.com/mwitkow/go-conntrack/LICENSE new file mode 100644 index 0000000000..8dada3edaf --- /dev/null +++ b/vendor/github.com/mwitkow/go-conntrack/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/mwitkow/go-conntrack/README.md b/vendor/github.com/mwitkow/go-conntrack/README.md new file mode 100644 index 0000000000..5ae7702844 --- /dev/null +++ b/vendor/github.com/mwitkow/go-conntrack/README.md @@ -0,0 +1,88 @@ +# Go tracing and monitoring (Prometheus) for `net.Conn` + +[![Travis Build](https://travis-ci.org/mwitkow/go-conntrack.svg)](https://travis-ci.org/mwitkow/go-conntrack) +[![Go Report Card](https://goreportcard.com/badge/github.com/mwitkow/go-conntrack)](http://goreportcard.com/report/mwitkow/go-conntrack) +[![GoDoc](http://img.shields.io/badge/GoDoc-Reference-blue.svg)](https://godoc.org/github.com/mwitkow/go-conntrack) +[![Apache 2.0 License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) + +[Prometheus](https://prometheus.io/) monitoring and [`x/net/trace`](https://godoc.org/golang.org/x/net/trace#EventLog) tracing wrappers `net.Conn`, both inbound (`net.Listener`) and outbound (`net.Dialer`). + +## Why? + +Go standard library does a great job of doing "the right" things with your connections: `http.Transport` pools outbound ones, and `http.Server` sets good *Keep Alive* defaults. +However, it is still easy to get it wrong, see the excellent [*The complete guide to Go net/http timeouts*](https://blog.cloudflare.com/the-complete-guide-to-golang-net-http-timeouts/). + +That's why you should be able to monitor (using Prometheus) how many connections your Go frontend servers have inbound, and how big are the connection pools to your backends. You should also be able to inspect your connection without `ssh` and `netstat`. + +![Events page with connections](https://raw.githubusercontent.com/mwitkow/go-conntrack/images/events.png) + +## How to use? + +All of these examples can be found in [`example/server.go`](example/server.go): + +### Conntrack Dialer for HTTP DefaultClient + +Most often people use the default `http.DefaultClient` that uses `http.DefaultTransport`. The easiest way to make sure all your outbound connections monitored and trace is: + +```go +http.DefaultTransport.(*http.Transport).DialContext = conntrack.NewDialContextFunc( + conntrack.DialWithTracing(), + conntrack.DialWithDialer(&net.Dialer{ + Timeout: 30 * time.Second, + KeepAlive: 30 * time.Second, + }), +) +``` + +#### Dialer Name + +Tracked outbound connections are organised by *dialer name* (with `default` being default). The *dialer name* is used for monitoring (`dialer_name` label) and tracing (`net.ClientConn.` family). + +You can pass `conntrack.WithDialerName()` to `NewDialContextFunc` to set the name for the dialer. Moreover, you can set the *dialer name* per invocation of the dialer, by passing it in the `Context`. For example using the [`ctxhttp`](https://godoc.org/golang.org/x/net/context/ctxhttp) lib: + +```go +callCtx := conntrack.DialNameToContext(parentCtx, "google") +ctxhttp.Get(callCtx, http.DefaultClient, "https://www.google.com") +``` + +### Conntrack Listener for HTTP Server + +Tracked inbound connections are organised by *listener name* (with `default` being default). The *listener name* is used for monitoring (`listener_name` label) and tracing (`net.ServerConn.` family). For example, a simple `http.Server` can be instrumented like this: + +```go +listener, err := net.Listen("tcp", fmt.Sprintf(":%d", *port)) +listener = conntrack.NewListener(listener, + conntrack.TrackWithName("http"), + conntrack.TrackWithTracing(), + conntrack.TrackWithTcpKeepAlive(5 * time.Minutes)) +httpServer.Serve(listener) +``` + +Note, the `TrackWithTcpKeepAlive`. The default `http.ListenAndServe` adds a tcp keep alive wrapper to inbound TCP connections. `conntrack.NewListener` allows you to do that without another layer of wrapping. + +#### TLS server example + +The standard lobrary `http.ListenAndServerTLS` does a lot to bootstrap TLS connections, including supporting HTTP2 negotiation. Unfortunately, that is hard to do if you want to provide your own `net.Listener`. That's why this repo comes with `connhelpers` package, which takes care of configuring `tls.Config` for that use case. Here's an example of use: + +```go +listener, err := net.Listen("tcp", fmt.Sprintf(":%d", *port)) +listener = conntrack.NewListener(listener, + conntrack.TrackWithName("https"), + conntrack.TrackWithTracing(), + conntrack.TrackWithTcpKeepAlive(5 * time.Minutes)) +tlsConfig, err := connhelpers.TlsConfigForServerCerts(*tlsCertFilePath, *tlsKeyFilePath) +tlsConfig, err = connhelpers.TlsConfigWithHttp2Enabled(tlsConfig) +tlsListener := tls.NewListener(listener, tlsConfig) +httpServer.Serve(listener) +``` + +# Status + +This code is used by Improbable's HTTP frontending and proxying stack for debuging and monitoring of established user connections. + +Additional tooling will be added if needed, and contributions are welcome. + +#License + +`go-conntrack` is released under the Apache 2.0 license. See the [LICENSE](LICENSE) file for details. + diff --git a/vendor/github.com/mwitkow/go-conntrack/dialer_reporter.go b/vendor/github.com/mwitkow/go-conntrack/dialer_reporter.go new file mode 100644 index 0000000000..0e39886b57 --- /dev/null +++ b/vendor/github.com/mwitkow/go-conntrack/dialer_reporter.go @@ -0,0 +1,108 @@ +// Copyright 2016 Michal Witkowski. All Rights Reserved. +// See LICENSE for licensing terms. + +package conntrack + +import ( + "context" + "net" + "os" + "syscall" + + prom "github.com/prometheus/client_golang/prometheus" +) + +type failureReason string + +const ( + failedResolution = "resolution" + failedConnRefused = "refused" + failedTimeout = "timeout" + failedUnknown = "unknown" +) + +var ( + dialerAttemptedTotal = prom.NewCounterVec( + prom.CounterOpts{ + Namespace: "net", + Subsystem: "conntrack", + Name: "dialer_conn_attempted_total", + Help: "Total number of connections attempted by the given dialer a given name.", + }, []string{"dialer_name"}) + + dialerConnEstablishedTotal = prom.NewCounterVec( + prom.CounterOpts{ + Namespace: "net", + Subsystem: "conntrack", + Name: "dialer_conn_established_total", + Help: "Total number of connections successfully established by the given dialer a given name.", + }, []string{"dialer_name"}) + + dialerConnFailedTotal = prom.NewCounterVec( + prom.CounterOpts{ + Namespace: "net", + Subsystem: "conntrack", + Name: "dialer_conn_failed_total", + Help: "Total number of connections failed to dial by the dialer a given name.", + }, []string{"dialer_name", "reason"}) + + dialerConnClosedTotal = prom.NewCounterVec( + prom.CounterOpts{ + Namespace: "net", + Subsystem: "conntrack", + Name: "dialer_conn_closed_total", + Help: "Total number of connections closed which originated from the dialer of a given name.", + }, []string{"dialer_name"}) +) + +func init() { + prom.MustRegister(dialerAttemptedTotal) + prom.MustRegister(dialerConnEstablishedTotal) + prom.MustRegister(dialerConnFailedTotal) + prom.MustRegister(dialerConnClosedTotal) +} + +// preRegisterDialerMetrics pre-populates Prometheus labels for the given dialer name, to avoid Prometheus missing labels issue. +func PreRegisterDialerMetrics(dialerName string) { + dialerAttemptedTotal.WithLabelValues(dialerName) + dialerConnEstablishedTotal.WithLabelValues(dialerName) + for _, reason := range []failureReason{failedTimeout, failedResolution, failedConnRefused, failedUnknown} { + dialerConnFailedTotal.WithLabelValues(dialerName, string(reason)) + } + dialerConnClosedTotal.WithLabelValues(dialerName) +} + +func reportDialerConnAttempt(dialerName string) { + dialerAttemptedTotal.WithLabelValues(dialerName).Inc() +} + +func reportDialerConnEstablished(dialerName string) { + dialerConnEstablishedTotal.WithLabelValues(dialerName).Inc() +} + +func reportDialerConnClosed(dialerName string) { + dialerConnClosedTotal.WithLabelValues(dialerName).Inc() +} + +func reportDialerConnFailed(dialerName string, err error) { + if netErr, ok := err.(*net.OpError); ok { + switch nestErr := netErr.Err.(type) { + case *net.DNSError: + dialerConnFailedTotal.WithLabelValues(dialerName, string(failedResolution)).Inc() + return + case *os.SyscallError: + if nestErr.Err == syscall.ECONNREFUSED { + dialerConnFailedTotal.WithLabelValues(dialerName, string(failedConnRefused)).Inc() + } + dialerConnFailedTotal.WithLabelValues(dialerName, string(failedUnknown)).Inc() + return + } + if netErr.Timeout() { + dialerConnFailedTotal.WithLabelValues(dialerName, string(failedTimeout)).Inc() + } + } else if err == context.Canceled || err == context.DeadlineExceeded { + dialerConnFailedTotal.WithLabelValues(dialerName, string(failedTimeout)).Inc() + return + } + dialerConnFailedTotal.WithLabelValues(dialerName, string(failedUnknown)).Inc() +} diff --git a/vendor/github.com/mwitkow/go-conntrack/dialer_wrapper.go b/vendor/github.com/mwitkow/go-conntrack/dialer_wrapper.go new file mode 100644 index 0000000000..cebaf96766 --- /dev/null +++ b/vendor/github.com/mwitkow/go-conntrack/dialer_wrapper.go @@ -0,0 +1,166 @@ +// Copyright 2016 Michal Witkowski. All Rights Reserved. +// See LICENSE for licensing terms. + +package conntrack + +import ( + "context" + "fmt" + "net" + "sync" + + "golang.org/x/net/trace" +) + +var ( + dialerNameKey = "conntrackDialerKey" +) + +type dialerOpts struct { + name string + monitoring bool + tracing bool + parentDialContextFunc dialerContextFunc +} + +type dialerOpt func(*dialerOpts) + +type dialerContextFunc func(context.Context, string, string) (net.Conn, error) + +// DialWithName sets the name of the dialer for tracking and monitoring. +// This is the name for the dialer (default is `default`), but for `NewDialContextFunc` can be overwritten from the +// Context using `DialNameToContext`. +func DialWithName(name string) dialerOpt { + return func(opts *dialerOpts) { + opts.name = name + } +} + +// DialWithoutMonitoring turns *off* Prometheus monitoring for this dialer. +func DialWithoutMonitoring() dialerOpt { + return func(opts *dialerOpts) { + opts.monitoring = false + } +} + +// DialWithTracing turns *on* the /debug/events tracing of the dial calls. +func DialWithTracing() dialerOpt { + return func(opts *dialerOpts) { + opts.tracing = true + } +} + +// DialWithDialer allows you to override the `net.Dialer` instance used to actually conduct the dials. +func DialWithDialer(parentDialer *net.Dialer) dialerOpt { + return DialWithDialContextFunc(parentDialer.DialContext) +} + +// DialWithDialContextFunc allows you to override func gets used for the actual dialing. The default is `net.Dialer.DialContext`. +func DialWithDialContextFunc(parentDialerFunc dialerContextFunc) dialerOpt { + return func(opts *dialerOpts) { + opts.parentDialContextFunc = parentDialerFunc + } +} + +// DialNameFromContext returns the name of the dialer from the context of the DialContext func, if any. +func DialNameFromContext(ctx context.Context) string { + val, ok := ctx.Value(dialerNameKey).(string) + if !ok { + return "" + } + return val +} + +// DialNameToContext returns a context that will contain a dialer name override. +func DialNameToContext(ctx context.Context, dialerName string) context.Context { + return context.WithValue(ctx, dialerNameKey, dialerName) +} + +// NewDialContextFunc returns a `DialContext` function that tracks outbound connections. +// The signature is compatible with `http.Tranport.DialContext` and is meant to be used there. +func NewDialContextFunc(optFuncs ...dialerOpt) func(context.Context, string, string) (net.Conn, error) { + opts := &dialerOpts{name: defaultName, monitoring: true, parentDialContextFunc: (&net.Dialer{}).DialContext} + for _, f := range optFuncs { + f(opts) + } + if opts.monitoring { + PreRegisterDialerMetrics(opts.name) + } + return func(ctx context.Context, network string, addr string) (net.Conn, error) { + name := opts.name + if ctxName := DialNameFromContext(ctx); ctxName != "" { + name = ctxName + } + return dialClientConnTracker(ctx, network, addr, name, opts) + } +} + +// NewDialFunc returns a `Dial` function that tracks outbound connections. +// The signature is compatible with `http.Tranport.Dial` and is meant to be used there for Go < 1.7. +func NewDialFunc(optFuncs ...dialerOpt) func(string, string) (net.Conn, error) { + dialContextFunc := NewDialContextFunc(optFuncs...) + return func(network string, addr string) (net.Conn, error) { + return dialContextFunc(context.TODO(), network, addr) + } +} + +type clientConnTracker struct { + net.Conn + opts *dialerOpts + dialerName string + event trace.EventLog + mu sync.Mutex +} + +func dialClientConnTracker(ctx context.Context, network string, addr string, dialerName string, opts *dialerOpts) (net.Conn, error) { + var event trace.EventLog + if opts.tracing { + event = trace.NewEventLog(fmt.Sprintf("net.ClientConn.%s", dialerName), fmt.Sprintf("%v", addr)) + } + if opts.monitoring { + reportDialerConnAttempt(dialerName) + } + conn, err := opts.parentDialContextFunc(ctx, network, addr) + if err != nil { + if event != nil { + event.Errorf("failed dialing: %v", err) + event.Finish() + } + if opts.monitoring { + reportDialerConnFailed(dialerName, err) + } + return nil, err + } + if event != nil { + event.Printf("established: %s -> %s", conn.LocalAddr(), conn.RemoteAddr()) + } + if opts.monitoring { + reportDialerConnEstablished(dialerName) + } + tracker := &clientConnTracker{ + Conn: conn, + opts: opts, + dialerName: dialerName, + event: event, + } + return tracker, nil +} + +func (ct *clientConnTracker) Close() error { + err := ct.Conn.Close() + ct.mu.Lock() + if ct.event != nil { + if err != nil { + ct.event.Errorf("failed closing: %v", err) + } else { + ct.event.Printf("closing") + } + ct.event.Finish() + ct.event = nil + } + ct.mu.Unlock() + if ct.opts.monitoring { + reportDialerConnClosed(ct.dialerName) + } + return err +} diff --git a/vendor/github.com/mwitkow/go-conntrack/listener_reporter.go b/vendor/github.com/mwitkow/go-conntrack/listener_reporter.go new file mode 100644 index 0000000000..21a8f5557c --- /dev/null +++ b/vendor/github.com/mwitkow/go-conntrack/listener_reporter.go @@ -0,0 +1,43 @@ +// Copyright 2016 Michal Witkowski. All Rights Reserved. +// See LICENSE for licensing terms. + +package conntrack + +import prom "github.com/prometheus/client_golang/prometheus" + +var ( + listenerAcceptedTotal = prom.NewCounterVec( + prom.CounterOpts{ + Namespace: "net", + Subsystem: "conntrack", + Name: "listener_conn_accepted_total", + Help: "Total number of connections opened to the listener of a given name.", + }, []string{"listener_name"}) + + listenerClosedTotal = prom.NewCounterVec( + prom.CounterOpts{ + Namespace: "net", + Subsystem: "conntrack", + Name: "listener_conn_closed_total", + Help: "Total number of connections closed that were made to the listener of a given name.", + }, []string{"listener_name"}) +) + +func init() { + prom.MustRegister(listenerAcceptedTotal) + prom.MustRegister(listenerClosedTotal) +} + +// preRegisterListener pre-populates Prometheus labels for the given listener name, to avoid Prometheus missing labels issue. +func preRegisterListenerMetrics(listenerName string) { + listenerAcceptedTotal.WithLabelValues(listenerName) + listenerClosedTotal.WithLabelValues(listenerName) +} + +func reportListenerConnAccepted(listenerName string) { + listenerAcceptedTotal.WithLabelValues(listenerName).Inc() +} + +func reportListenerConnClosed(listenerName string) { + listenerClosedTotal.WithLabelValues(listenerName).Inc() +} diff --git a/vendor/github.com/mwitkow/go-conntrack/listener_wrapper.go b/vendor/github.com/mwitkow/go-conntrack/listener_wrapper.go new file mode 100644 index 0000000000..702fe25577 --- /dev/null +++ b/vendor/github.com/mwitkow/go-conntrack/listener_wrapper.go @@ -0,0 +1,158 @@ +// Copyright 2016 Michal Witkowski. All Rights Reserved. +// See LICENSE for licensing terms. + +package conntrack + +import ( + "fmt" + "net" + "sync" + "time" + + "github.com/jpillora/backoff" + "golang.org/x/net/trace" +) + +const ( + defaultName = "default" +) + +type listenerOpts struct { + name string + monitoring bool + tracing bool + tcpKeepAlive time.Duration + retryBackoff *backoff.Backoff +} + +type listenerOpt func(*listenerOpts) + +// TrackWithName sets the name of the Listener for use in tracking and monitoring. +func TrackWithName(name string) listenerOpt { + return func(opts *listenerOpts) { + opts.name = name + } +} + +// TrackWithoutMonitoring turns *off* Prometheus monitoring for this listener. +func TrackWithoutMonitoring() listenerOpt { + return func(opts *listenerOpts) { + opts.monitoring = false + } +} + +// TrackWithTracing turns *on* the /debug/events tracing of the live listener connections. +func TrackWithTracing() listenerOpt { + return func(opts *listenerOpts) { + opts.tracing = true + } +} + +// TrackWithRetries enables retrying of temporary Accept() errors, with the given backoff between attempts. +// Concurrent accept calls that receive temporary errors have independent backoff scaling. +func TrackWithRetries(b backoff.Backoff) listenerOpt { + return func(opts *listenerOpts) { + opts.retryBackoff = &b + } +} + +// TrackWithTcpKeepAlive makes sure that any `net.TCPConn` that get accepted have a keep-alive. +// This is useful for HTTP servers in order for, for example laptops, to not use up resources on the +// server while they don't utilise their connection. +// A value of 0 disables it. +func TrackWithTcpKeepAlive(keepalive time.Duration) listenerOpt { + return func(opts *listenerOpts) { + opts.tcpKeepAlive = keepalive + } +} + +type connTrackListener struct { + net.Listener + opts *listenerOpts +} + +// NewListener returns the given listener wrapped in connection tracking listener. +func NewListener(inner net.Listener, optFuncs ...listenerOpt) net.Listener { + opts := &listenerOpts{ + name: defaultName, + monitoring: true, + tracing: false, + } + for _, f := range optFuncs { + f(opts) + } + if opts.monitoring { + preRegisterListenerMetrics(opts.name) + } + return &connTrackListener{ + Listener: inner, + opts: opts, + } +} + +func (ct *connTrackListener) Accept() (net.Conn, error) { + // TODO(mwitkow): Add monitoring of failed accept. + var ( + conn net.Conn + err error + ) + for attempt := 0; ; attempt++ { + conn, err = ct.Listener.Accept() + if err == nil || ct.opts.retryBackoff == nil { + break + } + if t, ok := err.(interface{ Temporary() bool }); !ok || !t.Temporary() { + break + } + time.Sleep(ct.opts.retryBackoff.ForAttempt(float64(attempt))) + } + if err != nil { + return nil, err + } + if tcpConn, ok := conn.(*net.TCPConn); ok && ct.opts.tcpKeepAlive > 0 { + tcpConn.SetKeepAlive(true) + tcpConn.SetKeepAlivePeriod(ct.opts.tcpKeepAlive) + } + return newServerConnTracker(conn, ct.opts), nil +} + +type serverConnTracker struct { + net.Conn + opts *listenerOpts + event trace.EventLog + mu sync.Mutex +} + +func newServerConnTracker(inner net.Conn, opts *listenerOpts) net.Conn { + tracker := &serverConnTracker{ + Conn: inner, + opts: opts, + } + if opts.tracing { + tracker.event = trace.NewEventLog(fmt.Sprintf("net.ServerConn.%s", opts.name), fmt.Sprintf("%v", inner.RemoteAddr())) + tracker.event.Printf("accepted: %v -> %v", inner.RemoteAddr(), inner.LocalAddr()) + } + if opts.monitoring { + reportListenerConnAccepted(opts.name) + } + return tracker +} + +func (ct *serverConnTracker) Close() error { + err := ct.Conn.Close() + ct.mu.Lock() + if ct.event != nil { + if err != nil { + ct.event.Errorf("failed closing: %v", err) + } else { + ct.event.Printf("closing") + } + ct.event.Finish() + ct.event = nil + } + ct.mu.Unlock() + if ct.opts.monitoring { + reportListenerConnClosed(ct.opts.name) + } + return err +} diff --git a/vendor/github.com/prometheus/client_golang/api/client.go b/vendor/github.com/prometheus/client_golang/api/client.go new file mode 100644 index 0000000000..f7ca60b672 --- /dev/null +++ b/vendor/github.com/prometheus/client_golang/api/client.go @@ -0,0 +1,129 @@ +// Copyright 2015 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package api provides clients for the HTTP APIs. +package api + +import ( + "context" + "io/ioutil" + "net" + "net/http" + "net/url" + "path" + "strings" + "time" +) + +// DefaultRoundTripper is used if no RoundTripper is set in Config. +var DefaultRoundTripper http.RoundTripper = &http.Transport{ + Proxy: http.ProxyFromEnvironment, + DialContext: (&net.Dialer{ + Timeout: 30 * time.Second, + KeepAlive: 30 * time.Second, + }).DialContext, + TLSHandshakeTimeout: 10 * time.Second, +} + +// Config defines configuration parameters for a new client. +type Config struct { + // The address of the Prometheus to connect to. + Address string + + // RoundTripper is used by the Client to drive HTTP requests. If not + // provided, DefaultRoundTripper will be used. + RoundTripper http.RoundTripper +} + +func (cfg *Config) roundTripper() http.RoundTripper { + if cfg.RoundTripper == nil { + return DefaultRoundTripper + } + return cfg.RoundTripper +} + +// Client is the interface for an API client. +type Client interface { + URL(ep string, args map[string]string) *url.URL + Do(context.Context, *http.Request) (*http.Response, []byte, error) +} + +// NewClient returns a new Client. +// +// It is safe to use the returned Client from multiple goroutines. +func NewClient(cfg Config) (Client, error) { + u, err := url.Parse(cfg.Address) + if err != nil { + return nil, err + } + u.Path = strings.TrimRight(u.Path, "/") + + return &httpClient{ + endpoint: u, + client: http.Client{Transport: cfg.roundTripper()}, + }, nil +} + +type httpClient struct { + endpoint *url.URL + client http.Client +} + +func (c *httpClient) URL(ep string, args map[string]string) *url.URL { + p := path.Join(c.endpoint.Path, ep) + + for arg, val := range args { + arg = ":" + arg + p = strings.Replace(p, arg, val, -1) + } + + u := *c.endpoint + u.Path = p + + return &u +} + +func (c *httpClient) Do(ctx context.Context, req *http.Request) (*http.Response, []byte, error) { + if ctx != nil { + req = req.WithContext(ctx) + } + resp, err := c.client.Do(req) + defer func() { + if resp != nil { + resp.Body.Close() + } + }() + + if err != nil { + return nil, nil, err + } + + var body []byte + done := make(chan struct{}) + go func() { + body, err = ioutil.ReadAll(resp.Body) + close(done) + }() + + select { + case <-ctx.Done(): + <-done + err = resp.Body.Close() + if err == nil { + err = ctx.Err() + } + case <-done: + } + + return resp, body, err +} diff --git a/vendor/github.com/prometheus/client_golang/api/prometheus/v1/api.go b/vendor/github.com/prometheus/client_golang/api/prometheus/v1/api.go new file mode 100644 index 0000000000..0c8de071cb --- /dev/null +++ b/vendor/github.com/prometheus/client_golang/api/prometheus/v1/api.go @@ -0,0 +1,1126 @@ +// Copyright 2017 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package v1 provides bindings to the Prometheus HTTP API v1: +// http://prometheus.io/docs/querying/api/ +package v1 + +import ( + "context" + "errors" + "fmt" + "math" + "net/http" + "net/url" + "strconv" + "strings" + "time" + "unsafe" + + json "github.com/json-iterator/go" + + "github.com/prometheus/common/model" + + "github.com/prometheus/client_golang/api" +) + +func init() { + json.RegisterTypeEncoderFunc("model.SamplePair", marshalPointJSON, marshalPointJSONIsEmpty) + json.RegisterTypeDecoderFunc("model.SamplePair", unMarshalPointJSON) +} + +func unMarshalPointJSON(ptr unsafe.Pointer, iter *json.Iterator) { + p := (*model.SamplePair)(ptr) + if !iter.ReadArray() { + iter.ReportError("unmarshal model.SamplePair", "SamplePair must be [timestamp, value]") + return + } + t := iter.ReadNumber() + if err := p.Timestamp.UnmarshalJSON([]byte(t)); err != nil { + iter.ReportError("unmarshal model.SamplePair", err.Error()) + return + } + if !iter.ReadArray() { + iter.ReportError("unmarshal model.SamplePair", "SamplePair missing value") + return + } + + f, err := strconv.ParseFloat(iter.ReadString(), 64) + if err != nil { + iter.ReportError("unmarshal model.SamplePair", err.Error()) + return + } + p.Value = model.SampleValue(f) + + if iter.ReadArray() { + iter.ReportError("unmarshal model.SamplePair", "SamplePair has too many values, must be [timestamp, value]") + return + } +} + +func marshalPointJSON(ptr unsafe.Pointer, stream *json.Stream) { + p := *((*model.SamplePair)(ptr)) + stream.WriteArrayStart() + // Write out the timestamp as a float divided by 1000. + // This is ~3x faster than converting to a float. + t := int64(p.Timestamp) + if t < 0 { + stream.WriteRaw(`-`) + t = -t + } + stream.WriteInt64(t / 1000) + fraction := t % 1000 + if fraction != 0 { + stream.WriteRaw(`.`) + if fraction < 100 { + stream.WriteRaw(`0`) + } + if fraction < 10 { + stream.WriteRaw(`0`) + } + stream.WriteInt64(fraction) + } + stream.WriteMore() + stream.WriteRaw(`"`) + + // Taken from https://github.com/json-iterator/go/blob/master/stream_float.go#L71 as a workaround + // to https://github.com/json-iterator/go/issues/365 (jsoniter, to follow json standard, doesn't allow inf/nan) + buf := stream.Buffer() + abs := math.Abs(float64(p.Value)) + fmt := byte('f') + // Note: Must use float32 comparisons for underlying float32 value to get precise cutoffs right. + if abs != 0 { + if abs < 1e-6 || abs >= 1e21 { + fmt = 'e' + } + } + buf = strconv.AppendFloat(buf, float64(p.Value), fmt, -1, 64) + stream.SetBuffer(buf) + + stream.WriteRaw(`"`) + stream.WriteArrayEnd() + +} + +func marshalPointJSONIsEmpty(ptr unsafe.Pointer) bool { + return false +} + +const ( + apiPrefix = "/api/v1" + + epAlerts = apiPrefix + "/alerts" + epAlertManagers = apiPrefix + "/alertmanagers" + epQuery = apiPrefix + "/query" + epQueryRange = apiPrefix + "/query_range" + epQueryExemplars = apiPrefix + "/query_exemplars" + epLabels = apiPrefix + "/labels" + epLabelValues = apiPrefix + "/label/:name/values" + epSeries = apiPrefix + "/series" + epTargets = apiPrefix + "/targets" + epTargetsMetadata = apiPrefix + "/targets/metadata" + epMetadata = apiPrefix + "/metadata" + epRules = apiPrefix + "/rules" + epSnapshot = apiPrefix + "/admin/tsdb/snapshot" + epDeleteSeries = apiPrefix + "/admin/tsdb/delete_series" + epCleanTombstones = apiPrefix + "/admin/tsdb/clean_tombstones" + epConfig = apiPrefix + "/status/config" + epFlags = apiPrefix + "/status/flags" + epBuildinfo = apiPrefix + "/status/buildinfo" + epRuntimeinfo = apiPrefix + "/status/runtimeinfo" + epTSDB = apiPrefix + "/status/tsdb" +) + +// AlertState models the state of an alert. +type AlertState string + +// ErrorType models the different API error types. +type ErrorType string + +// HealthStatus models the health status of a scrape target. +type HealthStatus string + +// RuleType models the type of a rule. +type RuleType string + +// RuleHealth models the health status of a rule. +type RuleHealth string + +// MetricType models the type of a metric. +type MetricType string + +const ( + // Possible values for AlertState. + AlertStateFiring AlertState = "firing" + AlertStateInactive AlertState = "inactive" + AlertStatePending AlertState = "pending" + + // Possible values for ErrorType. + ErrBadData ErrorType = "bad_data" + ErrTimeout ErrorType = "timeout" + ErrCanceled ErrorType = "canceled" + ErrExec ErrorType = "execution" + ErrBadResponse ErrorType = "bad_response" + ErrServer ErrorType = "server_error" + ErrClient ErrorType = "client_error" + + // Possible values for HealthStatus. + HealthGood HealthStatus = "up" + HealthUnknown HealthStatus = "unknown" + HealthBad HealthStatus = "down" + + // Possible values for RuleType. + RuleTypeRecording RuleType = "recording" + RuleTypeAlerting RuleType = "alerting" + + // Possible values for RuleHealth. + RuleHealthGood = "ok" + RuleHealthUnknown = "unknown" + RuleHealthBad = "err" + + // Possible values for MetricType + MetricTypeCounter MetricType = "counter" + MetricTypeGauge MetricType = "gauge" + MetricTypeHistogram MetricType = "histogram" + MetricTypeGaugeHistogram MetricType = "gaugehistogram" + MetricTypeSummary MetricType = "summary" + MetricTypeInfo MetricType = "info" + MetricTypeStateset MetricType = "stateset" + MetricTypeUnknown MetricType = "unknown" +) + +// Error is an error returned by the API. +type Error struct { + Type ErrorType + Msg string + Detail string +} + +func (e *Error) Error() string { + return fmt.Sprintf("%s: %s", e.Type, e.Msg) +} + +// Range represents a sliced time range. +type Range struct { + // The boundaries of the time range. + Start, End time.Time + // The maximum time between two slices within the boundaries. + Step time.Duration +} + +// API provides bindings for Prometheus's v1 API. +type API interface { + // Alerts returns a list of all active alerts. + Alerts(ctx context.Context) (AlertsResult, error) + // AlertManagers returns an overview of the current state of the Prometheus alert manager discovery. + AlertManagers(ctx context.Context) (AlertManagersResult, error) + // CleanTombstones removes the deleted data from disk and cleans up the existing tombstones. + CleanTombstones(ctx context.Context) error + // Config returns the current Prometheus configuration. + Config(ctx context.Context) (ConfigResult, error) + // DeleteSeries deletes data for a selection of series in a time range. + DeleteSeries(ctx context.Context, matches []string, startTime time.Time, endTime time.Time) error + // Flags returns the flag values that Prometheus was launched with. + Flags(ctx context.Context) (FlagsResult, error) + // LabelNames returns the unique label names present in the block in sorted order by given time range and matchers. + LabelNames(ctx context.Context, matches []string, startTime time.Time, endTime time.Time) ([]string, Warnings, error) + // LabelValues performs a query for the values of the given label, time range and matchers. + LabelValues(ctx context.Context, label string, matches []string, startTime time.Time, endTime time.Time) (model.LabelValues, Warnings, error) + // Query performs a query for the given time. + Query(ctx context.Context, query string, ts time.Time) (model.Value, Warnings, error) + // QueryRange performs a query for the given range. + QueryRange(ctx context.Context, query string, r Range) (model.Value, Warnings, error) + // QueryExemplars performs a query for exemplars by the given query and time range. + QueryExemplars(ctx context.Context, query string, startTime time.Time, endTime time.Time) ([]ExemplarQueryResult, error) + // Buildinfo returns various build information properties about the Prometheus server + Buildinfo(ctx context.Context) (BuildinfoResult, error) + // Runtimeinfo returns the various runtime information properties about the Prometheus server. + Runtimeinfo(ctx context.Context) (RuntimeinfoResult, error) + // Series finds series by label matchers. + Series(ctx context.Context, matches []string, startTime time.Time, endTime time.Time) ([]model.LabelSet, Warnings, error) + // Snapshot creates a snapshot of all current data into snapshots/- + // under the TSDB's data directory and returns the directory as response. + Snapshot(ctx context.Context, skipHead bool) (SnapshotResult, error) + // Rules returns a list of alerting and recording rules that are currently loaded. + Rules(ctx context.Context) (RulesResult, error) + // Targets returns an overview of the current state of the Prometheus target discovery. + Targets(ctx context.Context) (TargetsResult, error) + // TargetsMetadata returns metadata about metrics currently scraped by the target. + TargetsMetadata(ctx context.Context, matchTarget string, metric string, limit string) ([]MetricMetadata, error) + // Metadata returns metadata about metrics currently scraped by the metric name. + Metadata(ctx context.Context, metric string, limit string) (map[string][]Metadata, error) + // TSDB returns the cardinality statistics. + TSDB(ctx context.Context) (TSDBResult, error) +} + +// AlertsResult contains the result from querying the alerts endpoint. +type AlertsResult struct { + Alerts []Alert `json:"alerts"` +} + +// AlertManagersResult contains the result from querying the alertmanagers endpoint. +type AlertManagersResult struct { + Active []AlertManager `json:"activeAlertManagers"` + Dropped []AlertManager `json:"droppedAlertManagers"` +} + +// AlertManager models a configured Alert Manager. +type AlertManager struct { + URL string `json:"url"` +} + +// ConfigResult contains the result from querying the config endpoint. +type ConfigResult struct { + YAML string `json:"yaml"` +} + +// FlagsResult contains the result from querying the flag endpoint. +type FlagsResult map[string]string + +// BuildinfoResult contains the results from querying the buildinfo endpoint. +type BuildinfoResult struct { + Version string `json:"version"` + Revision string `json:"revision"` + Branch string `json:"branch"` + BuildUser string `json:"buildUser"` + BuildDate string `json:"buildDate"` + GoVersion string `json:"goVersion"` +} + +// RuntimeinfoResult contains the result from querying the runtimeinfo endpoint. +type RuntimeinfoResult struct { + StartTime time.Time `json:"startTime"` + CWD string `json:"CWD"` + ReloadConfigSuccess bool `json:"reloadConfigSuccess"` + LastConfigTime time.Time `json:"lastConfigTime"` + ChunkCount int `json:"chunkCount"` + TimeSeriesCount int `json:"timeSeriesCount"` + CorruptionCount int `json:"corruptionCount"` + GoroutineCount int `json:"goroutineCount"` + GOMAXPROCS int `json:"GOMAXPROCS"` + GOGC string `json:"GOGC"` + GODEBUG string `json:"GODEBUG"` + StorageRetention string `json:"storageRetention"` +} + +// SnapshotResult contains the result from querying the snapshot endpoint. +type SnapshotResult struct { + Name string `json:"name"` +} + +// RulesResult contains the result from querying the rules endpoint. +type RulesResult struct { + Groups []RuleGroup `json:"groups"` +} + +// RuleGroup models a rule group that contains a set of recording and alerting rules. +type RuleGroup struct { + Name string `json:"name"` + File string `json:"file"` + Interval float64 `json:"interval"` + Rules Rules `json:"rules"` +} + +// Recording and alerting rules are stored in the same slice to preserve the order +// that rules are returned in by the API. +// +// Rule types can be determined using a type switch: +// switch v := rule.(type) { +// case RecordingRule: +// fmt.Print("got a recording rule") +// case AlertingRule: +// fmt.Print("got a alerting rule") +// default: +// fmt.Printf("unknown rule type %s", v) +// } +type Rules []interface{} + +// AlertingRule models a alerting rule. +type AlertingRule struct { + Name string `json:"name"` + Query string `json:"query"` + Duration float64 `json:"duration"` + Labels model.LabelSet `json:"labels"` + Annotations model.LabelSet `json:"annotations"` + Alerts []*Alert `json:"alerts"` + Health RuleHealth `json:"health"` + LastError string `json:"lastError,omitempty"` + EvaluationTime float64 `json:"evaluationTime"` + LastEvaluation time.Time `json:"lastEvaluation"` + State string `json:"state"` +} + +// RecordingRule models a recording rule. +type RecordingRule struct { + Name string `json:"name"` + Query string `json:"query"` + Labels model.LabelSet `json:"labels,omitempty"` + Health RuleHealth `json:"health"` + LastError string `json:"lastError,omitempty"` + EvaluationTime float64 `json:"evaluationTime"` + LastEvaluation time.Time `json:"lastEvaluation"` +} + +// Alert models an active alert. +type Alert struct { + ActiveAt time.Time `json:"activeAt"` + Annotations model.LabelSet + Labels model.LabelSet + State AlertState + Value string +} + +// TargetsResult contains the result from querying the targets endpoint. +type TargetsResult struct { + Active []ActiveTarget `json:"activeTargets"` + Dropped []DroppedTarget `json:"droppedTargets"` +} + +// ActiveTarget models an active Prometheus scrape target. +type ActiveTarget struct { + DiscoveredLabels map[string]string `json:"discoveredLabels"` + Labels model.LabelSet `json:"labels"` + ScrapePool string `json:"scrapePool"` + ScrapeURL string `json:"scrapeUrl"` + GlobalURL string `json:"globalUrl"` + LastError string `json:"lastError"` + LastScrape time.Time `json:"lastScrape"` + LastScrapeDuration float64 `json:"lastScrapeDuration"` + Health HealthStatus `json:"health"` +} + +// DroppedTarget models a dropped Prometheus scrape target. +type DroppedTarget struct { + DiscoveredLabels map[string]string `json:"discoveredLabels"` +} + +// MetricMetadata models the metadata of a metric with its scrape target and name. +type MetricMetadata struct { + Target map[string]string `json:"target"` + Metric string `json:"metric,omitempty"` + Type MetricType `json:"type"` + Help string `json:"help"` + Unit string `json:"unit"` +} + +// Metadata models the metadata of a metric. +type Metadata struct { + Type MetricType `json:"type"` + Help string `json:"help"` + Unit string `json:"unit"` +} + +// queryResult contains result data for a query. +type queryResult struct { + Type model.ValueType `json:"resultType"` + Result interface{} `json:"result"` + + // The decoded value. + v model.Value +} + +// TSDBResult contains the result from querying the tsdb endpoint. +type TSDBResult struct { + SeriesCountByMetricName []Stat `json:"seriesCountByMetricName"` + LabelValueCountByLabelName []Stat `json:"labelValueCountByLabelName"` + MemoryInBytesByLabelName []Stat `json:"memoryInBytesByLabelName"` + SeriesCountByLabelValuePair []Stat `json:"seriesCountByLabelValuePair"` +} + +// Stat models information about statistic value. +type Stat struct { + Name string `json:"name"` + Value uint64 `json:"value"` +} + +func (rg *RuleGroup) UnmarshalJSON(b []byte) error { + v := struct { + Name string `json:"name"` + File string `json:"file"` + Interval float64 `json:"interval"` + Rules []json.RawMessage `json:"rules"` + }{} + + if err := json.Unmarshal(b, &v); err != nil { + return err + } + + rg.Name = v.Name + rg.File = v.File + rg.Interval = v.Interval + + for _, rule := range v.Rules { + alertingRule := AlertingRule{} + if err := json.Unmarshal(rule, &alertingRule); err == nil { + rg.Rules = append(rg.Rules, alertingRule) + continue + } + recordingRule := RecordingRule{} + if err := json.Unmarshal(rule, &recordingRule); err == nil { + rg.Rules = append(rg.Rules, recordingRule) + continue + } + return errors.New("failed to decode JSON into an alerting or recording rule") + } + + return nil +} + +func (r *AlertingRule) UnmarshalJSON(b []byte) error { + v := struct { + Type string `json:"type"` + }{} + if err := json.Unmarshal(b, &v); err != nil { + return err + } + if v.Type == "" { + return errors.New("type field not present in rule") + } + if v.Type != string(RuleTypeAlerting) { + return fmt.Errorf("expected rule of type %s but got %s", string(RuleTypeAlerting), v.Type) + } + + rule := struct { + Name string `json:"name"` + Query string `json:"query"` + Duration float64 `json:"duration"` + Labels model.LabelSet `json:"labels"` + Annotations model.LabelSet `json:"annotations"` + Alerts []*Alert `json:"alerts"` + Health RuleHealth `json:"health"` + LastError string `json:"lastError,omitempty"` + EvaluationTime float64 `json:"evaluationTime"` + LastEvaluation time.Time `json:"lastEvaluation"` + State string `json:"state"` + }{} + if err := json.Unmarshal(b, &rule); err != nil { + return err + } + r.Health = rule.Health + r.Annotations = rule.Annotations + r.Name = rule.Name + r.Query = rule.Query + r.Alerts = rule.Alerts + r.Duration = rule.Duration + r.Labels = rule.Labels + r.LastError = rule.LastError + r.EvaluationTime = rule.EvaluationTime + r.LastEvaluation = rule.LastEvaluation + r.State = rule.State + + return nil +} + +func (r *RecordingRule) UnmarshalJSON(b []byte) error { + v := struct { + Type string `json:"type"` + }{} + if err := json.Unmarshal(b, &v); err != nil { + return err + } + if v.Type == "" { + return errors.New("type field not present in rule") + } + if v.Type != string(RuleTypeRecording) { + return fmt.Errorf("expected rule of type %s but got %s", string(RuleTypeRecording), v.Type) + } + + rule := struct { + Name string `json:"name"` + Query string `json:"query"` + Labels model.LabelSet `json:"labels,omitempty"` + Health RuleHealth `json:"health"` + LastError string `json:"lastError,omitempty"` + EvaluationTime float64 `json:"evaluationTime"` + LastEvaluation time.Time `json:"lastEvaluation"` + }{} + if err := json.Unmarshal(b, &rule); err != nil { + return err + } + r.Health = rule.Health + r.Labels = rule.Labels + r.Name = rule.Name + r.LastError = rule.LastError + r.Query = rule.Query + r.EvaluationTime = rule.EvaluationTime + r.LastEvaluation = rule.LastEvaluation + + return nil +} + +func (qr *queryResult) UnmarshalJSON(b []byte) error { + v := struct { + Type model.ValueType `json:"resultType"` + Result json.RawMessage `json:"result"` + }{} + + err := json.Unmarshal(b, &v) + if err != nil { + return err + } + + switch v.Type { + case model.ValScalar: + var sv model.Scalar + err = json.Unmarshal(v.Result, &sv) + qr.v = &sv + + case model.ValVector: + var vv model.Vector + err = json.Unmarshal(v.Result, &vv) + qr.v = vv + + case model.ValMatrix: + var mv model.Matrix + err = json.Unmarshal(v.Result, &mv) + qr.v = mv + + default: + err = fmt.Errorf("unexpected value type %q", v.Type) + } + return err +} + +// Exemplar is additional information associated with a time series. +type Exemplar struct { + Labels model.LabelSet `json:"labels"` + Value model.SampleValue `json:"value"` + Timestamp model.Time `json:"timestamp"` +} + +type ExemplarQueryResult struct { + SeriesLabels model.LabelSet `json:"seriesLabels"` + Exemplars []Exemplar `json:"exemplars"` +} + +// NewAPI returns a new API for the client. +// +// It is safe to use the returned API from multiple goroutines. +func NewAPI(c api.Client) API { + return &httpAPI{ + client: &apiClientImpl{ + client: c, + }, + } +} + +type httpAPI struct { + client apiClient +} + +func (h *httpAPI) Alerts(ctx context.Context) (AlertsResult, error) { + u := h.client.URL(epAlerts, nil) + + req, err := http.NewRequest(http.MethodGet, u.String(), nil) + if err != nil { + return AlertsResult{}, err + } + + _, body, _, err := h.client.Do(ctx, req) + if err != nil { + return AlertsResult{}, err + } + + var res AlertsResult + return res, json.Unmarshal(body, &res) +} + +func (h *httpAPI) AlertManagers(ctx context.Context) (AlertManagersResult, error) { + u := h.client.URL(epAlertManagers, nil) + + req, err := http.NewRequest(http.MethodGet, u.String(), nil) + if err != nil { + return AlertManagersResult{}, err + } + + _, body, _, err := h.client.Do(ctx, req) + if err != nil { + return AlertManagersResult{}, err + } + + var res AlertManagersResult + return res, json.Unmarshal(body, &res) +} + +func (h *httpAPI) CleanTombstones(ctx context.Context) error { + u := h.client.URL(epCleanTombstones, nil) + + req, err := http.NewRequest(http.MethodPost, u.String(), nil) + if err != nil { + return err + } + + _, _, _, err = h.client.Do(ctx, req) + return err +} + +func (h *httpAPI) Config(ctx context.Context) (ConfigResult, error) { + u := h.client.URL(epConfig, nil) + + req, err := http.NewRequest(http.MethodGet, u.String(), nil) + if err != nil { + return ConfigResult{}, err + } + + _, body, _, err := h.client.Do(ctx, req) + if err != nil { + return ConfigResult{}, err + } + + var res ConfigResult + return res, json.Unmarshal(body, &res) +} + +func (h *httpAPI) DeleteSeries(ctx context.Context, matches []string, startTime time.Time, endTime time.Time) error { + u := h.client.URL(epDeleteSeries, nil) + q := u.Query() + + for _, m := range matches { + q.Add("match[]", m) + } + + q.Set("start", formatTime(startTime)) + q.Set("end", formatTime(endTime)) + + u.RawQuery = q.Encode() + + req, err := http.NewRequest(http.MethodPost, u.String(), nil) + if err != nil { + return err + } + + _, _, _, err = h.client.Do(ctx, req) + return err +} + +func (h *httpAPI) Flags(ctx context.Context) (FlagsResult, error) { + u := h.client.URL(epFlags, nil) + + req, err := http.NewRequest(http.MethodGet, u.String(), nil) + if err != nil { + return FlagsResult{}, err + } + + _, body, _, err := h.client.Do(ctx, req) + if err != nil { + return FlagsResult{}, err + } + + var res FlagsResult + return res, json.Unmarshal(body, &res) +} + +func (h *httpAPI) Buildinfo(ctx context.Context) (BuildinfoResult, error) { + u := h.client.URL(epBuildinfo, nil) + + req, err := http.NewRequest(http.MethodGet, u.String(), nil) + if err != nil { + return BuildinfoResult{}, err + } + + _, body, _, err := h.client.Do(ctx, req) + if err != nil { + return BuildinfoResult{}, err + } + + var res BuildinfoResult + return res, json.Unmarshal(body, &res) +} + +func (h *httpAPI) Runtimeinfo(ctx context.Context) (RuntimeinfoResult, error) { + u := h.client.URL(epRuntimeinfo, nil) + + req, err := http.NewRequest(http.MethodGet, u.String(), nil) + if err != nil { + return RuntimeinfoResult{}, err + } + + _, body, _, err := h.client.Do(ctx, req) + if err != nil { + return RuntimeinfoResult{}, err + } + + var res RuntimeinfoResult + return res, json.Unmarshal(body, &res) +} + +func (h *httpAPI) LabelNames(ctx context.Context, matches []string, startTime time.Time, endTime time.Time) ([]string, Warnings, error) { + u := h.client.URL(epLabels, nil) + q := u.Query() + q.Set("start", formatTime(startTime)) + q.Set("end", formatTime(endTime)) + for _, m := range matches { + q.Add("match[]", m) + } + + u.RawQuery = q.Encode() + + req, err := http.NewRequest(http.MethodGet, u.String(), nil) + if err != nil { + return nil, nil, err + } + _, body, w, err := h.client.Do(ctx, req) + if err != nil { + return nil, w, err + } + var labelNames []string + return labelNames, w, json.Unmarshal(body, &labelNames) +} + +func (h *httpAPI) LabelValues(ctx context.Context, label string, matches []string, startTime time.Time, endTime time.Time) (model.LabelValues, Warnings, error) { + u := h.client.URL(epLabelValues, map[string]string{"name": label}) + q := u.Query() + q.Set("start", formatTime(startTime)) + q.Set("end", formatTime(endTime)) + for _, m := range matches { + q.Add("match[]", m) + } + + u.RawQuery = q.Encode() + + req, err := http.NewRequest(http.MethodGet, u.String(), nil) + if err != nil { + return nil, nil, err + } + _, body, w, err := h.client.Do(ctx, req) + if err != nil { + return nil, w, err + } + var labelValues model.LabelValues + return labelValues, w, json.Unmarshal(body, &labelValues) +} + +func (h *httpAPI) Query(ctx context.Context, query string, ts time.Time) (model.Value, Warnings, error) { + u := h.client.URL(epQuery, nil) + q := u.Query() + + q.Set("query", query) + if !ts.IsZero() { + q.Set("time", formatTime(ts)) + } + + _, body, warnings, err := h.client.DoGetFallback(ctx, u, q) + if err != nil { + return nil, warnings, err + } + + var qres queryResult + return model.Value(qres.v), warnings, json.Unmarshal(body, &qres) +} + +func (h *httpAPI) QueryRange(ctx context.Context, query string, r Range) (model.Value, Warnings, error) { + u := h.client.URL(epQueryRange, nil) + q := u.Query() + + q.Set("query", query) + q.Set("start", formatTime(r.Start)) + q.Set("end", formatTime(r.End)) + q.Set("step", strconv.FormatFloat(r.Step.Seconds(), 'f', -1, 64)) + + _, body, warnings, err := h.client.DoGetFallback(ctx, u, q) + if err != nil { + return nil, warnings, err + } + + var qres queryResult + + return model.Value(qres.v), warnings, json.Unmarshal(body, &qres) +} + +func (h *httpAPI) Series(ctx context.Context, matches []string, startTime time.Time, endTime time.Time) ([]model.LabelSet, Warnings, error) { + u := h.client.URL(epSeries, nil) + q := u.Query() + + for _, m := range matches { + q.Add("match[]", m) + } + + q.Set("start", formatTime(startTime)) + q.Set("end", formatTime(endTime)) + + u.RawQuery = q.Encode() + + req, err := http.NewRequest(http.MethodGet, u.String(), nil) + if err != nil { + return nil, nil, err + } + + _, body, warnings, err := h.client.Do(ctx, req) + if err != nil { + return nil, warnings, err + } + + var mset []model.LabelSet + return mset, warnings, json.Unmarshal(body, &mset) +} + +func (h *httpAPI) Snapshot(ctx context.Context, skipHead bool) (SnapshotResult, error) { + u := h.client.URL(epSnapshot, nil) + q := u.Query() + + q.Set("skip_head", strconv.FormatBool(skipHead)) + + u.RawQuery = q.Encode() + + req, err := http.NewRequest(http.MethodPost, u.String(), nil) + if err != nil { + return SnapshotResult{}, err + } + + _, body, _, err := h.client.Do(ctx, req) + if err != nil { + return SnapshotResult{}, err + } + + var res SnapshotResult + return res, json.Unmarshal(body, &res) +} + +func (h *httpAPI) Rules(ctx context.Context) (RulesResult, error) { + u := h.client.URL(epRules, nil) + + req, err := http.NewRequest(http.MethodGet, u.String(), nil) + if err != nil { + return RulesResult{}, err + } + + _, body, _, err := h.client.Do(ctx, req) + if err != nil { + return RulesResult{}, err + } + + var res RulesResult + return res, json.Unmarshal(body, &res) +} + +func (h *httpAPI) Targets(ctx context.Context) (TargetsResult, error) { + u := h.client.URL(epTargets, nil) + + req, err := http.NewRequest(http.MethodGet, u.String(), nil) + if err != nil { + return TargetsResult{}, err + } + + _, body, _, err := h.client.Do(ctx, req) + if err != nil { + return TargetsResult{}, err + } + + var res TargetsResult + return res, json.Unmarshal(body, &res) +} + +func (h *httpAPI) TargetsMetadata(ctx context.Context, matchTarget string, metric string, limit string) ([]MetricMetadata, error) { + u := h.client.URL(epTargetsMetadata, nil) + q := u.Query() + + q.Set("match_target", matchTarget) + q.Set("metric", metric) + q.Set("limit", limit) + + u.RawQuery = q.Encode() + + req, err := http.NewRequest(http.MethodGet, u.String(), nil) + if err != nil { + return nil, err + } + + _, body, _, err := h.client.Do(ctx, req) + if err != nil { + return nil, err + } + + var res []MetricMetadata + return res, json.Unmarshal(body, &res) +} + +func (h *httpAPI) Metadata(ctx context.Context, metric string, limit string) (map[string][]Metadata, error) { + u := h.client.URL(epMetadata, nil) + q := u.Query() + + q.Set("metric", metric) + q.Set("limit", limit) + + u.RawQuery = q.Encode() + + req, err := http.NewRequest(http.MethodGet, u.String(), nil) + if err != nil { + return nil, err + } + + _, body, _, err := h.client.Do(ctx, req) + if err != nil { + return nil, err + } + + var res map[string][]Metadata + return res, json.Unmarshal(body, &res) +} + +func (h *httpAPI) TSDB(ctx context.Context) (TSDBResult, error) { + u := h.client.URL(epTSDB, nil) + + req, err := http.NewRequest(http.MethodGet, u.String(), nil) + if err != nil { + return TSDBResult{}, err + } + + _, body, _, err := h.client.Do(ctx, req) + if err != nil { + return TSDBResult{}, err + } + + var res TSDBResult + return res, json.Unmarshal(body, &res) +} + +func (h *httpAPI) QueryExemplars(ctx context.Context, query string, startTime time.Time, endTime time.Time) ([]ExemplarQueryResult, error) { + u := h.client.URL(epQueryExemplars, nil) + q := u.Query() + + q.Set("query", query) + q.Set("start", formatTime(startTime)) + q.Set("end", formatTime(endTime)) + u.RawQuery = q.Encode() + + req, err := http.NewRequest(http.MethodGet, u.String(), nil) + if err != nil { + return nil, err + } + + _, body, _, err := h.client.Do(ctx, req) + if err != nil { + return nil, err + } + + var res []ExemplarQueryResult + return res, json.Unmarshal(body, &res) +} + +// Warnings is an array of non critical errors +type Warnings []string + +// apiClient wraps a regular client and processes successful API responses. +// Successful also includes responses that errored at the API level. +type apiClient interface { + URL(ep string, args map[string]string) *url.URL + Do(context.Context, *http.Request) (*http.Response, []byte, Warnings, error) + DoGetFallback(ctx context.Context, u *url.URL, args url.Values) (*http.Response, []byte, Warnings, error) +} + +type apiClientImpl struct { + client api.Client +} + +type apiResponse struct { + Status string `json:"status"` + Data json.RawMessage `json:"data"` + ErrorType ErrorType `json:"errorType"` + Error string `json:"error"` + Warnings []string `json:"warnings,omitempty"` +} + +func apiError(code int) bool { + // These are the codes that Prometheus sends when it returns an error. + return code == http.StatusUnprocessableEntity || code == http.StatusBadRequest +} + +func errorTypeAndMsgFor(resp *http.Response) (ErrorType, string) { + switch resp.StatusCode / 100 { + case 4: + return ErrClient, fmt.Sprintf("client error: %d", resp.StatusCode) + case 5: + return ErrServer, fmt.Sprintf("server error: %d", resp.StatusCode) + } + return ErrBadResponse, fmt.Sprintf("bad response code %d", resp.StatusCode) +} + +func (h *apiClientImpl) URL(ep string, args map[string]string) *url.URL { + return h.client.URL(ep, args) +} + +func (h *apiClientImpl) Do(ctx context.Context, req *http.Request) (*http.Response, []byte, Warnings, error) { + resp, body, err := h.client.Do(ctx, req) + if err != nil { + return resp, body, nil, err + } + + code := resp.StatusCode + + if code/100 != 2 && !apiError(code) { + errorType, errorMsg := errorTypeAndMsgFor(resp) + return resp, body, nil, &Error{ + Type: errorType, + Msg: errorMsg, + Detail: string(body), + } + } + + var result apiResponse + + if http.StatusNoContent != code { + if jsonErr := json.Unmarshal(body, &result); jsonErr != nil { + return resp, body, nil, &Error{ + Type: ErrBadResponse, + Msg: jsonErr.Error(), + } + } + } + + if apiError(code) && result.Status == "success" { + err = &Error{ + Type: ErrBadResponse, + Msg: "inconsistent body for response code", + } + } + + if result.Status == "error" { + err = &Error{ + Type: result.ErrorType, + Msg: result.Error, + } + } + + return resp, []byte(result.Data), result.Warnings, err + +} + +// DoGetFallback will attempt to do the request as-is, and on a 405 or 501 it +// will fallback to a GET request. +func (h *apiClientImpl) DoGetFallback(ctx context.Context, u *url.URL, args url.Values) (*http.Response, []byte, Warnings, error) { + req, err := http.NewRequest(http.MethodPost, u.String(), strings.NewReader(args.Encode())) + if err != nil { + return nil, nil, nil, err + } + req.Header.Set("Content-Type", "application/x-www-form-urlencoded") + + resp, body, warnings, err := h.Do(ctx, req) + if resp != nil && (resp.StatusCode == http.StatusMethodNotAllowed || resp.StatusCode == http.StatusNotImplemented) { + u.RawQuery = args.Encode() + req, err = http.NewRequest(http.MethodGet, u.String(), nil) + if err != nil { + return nil, nil, warnings, err + } + + } else { + if err != nil { + return resp, body, warnings, err + } + return resp, body, warnings, nil + } + return h.Do(ctx, req) +} + +func formatTime(t time.Time) string { + return strconv.FormatFloat(float64(t.Unix())+float64(t.Nanosecond())/1e9, 'f', -1, 64) +} diff --git a/vendor/github.com/prometheus/common/config/config.go b/vendor/github.com/prometheus/common/config/config.go new file mode 100644 index 0000000000..fffda4a7ef --- /dev/null +++ b/vendor/github.com/prometheus/common/config/config.go @@ -0,0 +1,66 @@ +// Copyright 2016 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This package no longer handles safe yaml parsing. In order to +// ensure correct yaml unmarshalling, use "yaml.UnmarshalStrict()". + +package config + +import ( + "encoding/json" + "path/filepath" +) + +const secretToken = "" + +// Secret special type for storing secrets. +type Secret string + +// MarshalYAML implements the yaml.Marshaler interface for Secrets. +func (s Secret) MarshalYAML() (interface{}, error) { + if s != "" { + return secretToken, nil + } + return nil, nil +} + +//UnmarshalYAML implements the yaml.Unmarshaler interface for Secrets. +func (s *Secret) UnmarshalYAML(unmarshal func(interface{}) error) error { + type plain Secret + return unmarshal((*plain)(s)) +} + +// MarshalJSON implements the json.Marshaler interface for Secret. +func (s Secret) MarshalJSON() ([]byte, error) { + if len(s) == 0 { + return json.Marshal("") + } + return json.Marshal(secretToken) +} + +// DirectorySetter is a config type that contains file paths that may +// be relative to the file containing the config. +type DirectorySetter interface { + // SetDirectory joins any relative file paths with dir. + // Any paths that are empty or absolute remain unchanged. + SetDirectory(dir string) +} + +// JoinDir joins dir and path if path is relative. +// If path is empty or absolute, it is returned unchanged. +func JoinDir(dir, path string) string { + if path == "" || filepath.IsAbs(path) { + return path + } + return filepath.Join(dir, path) +} diff --git a/vendor/github.com/prometheus/common/config/http_config.go b/vendor/github.com/prometheus/common/config/http_config.go new file mode 100644 index 0000000000..350baf7548 --- /dev/null +++ b/vendor/github.com/prometheus/common/config/http_config.go @@ -0,0 +1,804 @@ +// Copyright 2016 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build go1.8 + +package config + +import ( + "bytes" + "context" + "crypto/sha256" + "crypto/tls" + "crypto/x509" + "encoding/json" + "fmt" + "io/ioutil" + "net" + "net/http" + "net/url" + "strings" + "sync" + "time" + + "github.com/mwitkow/go-conntrack" + "golang.org/x/net/http2" + "golang.org/x/oauth2" + "golang.org/x/oauth2/clientcredentials" + "gopkg.in/yaml.v2" +) + +// DefaultHTTPClientConfig is the default HTTP client configuration. +var DefaultHTTPClientConfig = HTTPClientConfig{ + FollowRedirects: true, +} + +// defaultHTTPClientOptions holds the default HTTP client options. +var defaultHTTPClientOptions = httpClientOptions{ + keepAlivesEnabled: true, + http2Enabled: true, +} + +type closeIdler interface { + CloseIdleConnections() +} + +// BasicAuth contains basic HTTP authentication credentials. +type BasicAuth struct { + Username string `yaml:"username" json:"username"` + Password Secret `yaml:"password,omitempty" json:"password,omitempty"` + PasswordFile string `yaml:"password_file,omitempty" json:"password_file,omitempty"` +} + +// SetDirectory joins any relative file paths with dir. +func (a *BasicAuth) SetDirectory(dir string) { + if a == nil { + return + } + a.PasswordFile = JoinDir(dir, a.PasswordFile) +} + +// Authorization contains HTTP authorization credentials. +type Authorization struct { + Type string `yaml:"type,omitempty" json:"type,omitempty"` + Credentials Secret `yaml:"credentials,omitempty" json:"credentials,omitempty"` + CredentialsFile string `yaml:"credentials_file,omitempty" json:"credentials_file,omitempty"` +} + +// SetDirectory joins any relative file paths with dir. +func (a *Authorization) SetDirectory(dir string) { + if a == nil { + return + } + a.CredentialsFile = JoinDir(dir, a.CredentialsFile) +} + +// URL is a custom URL type that allows validation at configuration load time. +type URL struct { + *url.URL +} + +// UnmarshalYAML implements the yaml.Unmarshaler interface for URLs. +func (u *URL) UnmarshalYAML(unmarshal func(interface{}) error) error { + var s string + if err := unmarshal(&s); err != nil { + return err + } + + urlp, err := url.Parse(s) + if err != nil { + return err + } + u.URL = urlp + return nil +} + +// MarshalYAML implements the yaml.Marshaler interface for URLs. +func (u URL) MarshalYAML() (interface{}, error) { + if u.URL != nil { + return u.String(), nil + } + return nil, nil +} + +// UnmarshalJSON implements the json.Marshaler interface for URL. +func (u *URL) UnmarshalJSON(data []byte) error { + var s string + if err := json.Unmarshal(data, &s); err != nil { + return err + } + urlp, err := url.Parse(s) + if err != nil { + return err + } + u.URL = urlp + return nil +} + +// MarshalJSON implements the json.Marshaler interface for URL. +func (u URL) MarshalJSON() ([]byte, error) { + if u.URL != nil { + return json.Marshal(u.URL.String()) + } + return nil, nil +} + +// OAuth2 is the oauth2 client configuration. +type OAuth2 struct { + ClientID string `yaml:"client_id" json:"client_id"` + ClientSecret Secret `yaml:"client_secret" json:"client_secret"` + ClientSecretFile string `yaml:"client_secret_file" json:"client_secret_file"` + Scopes []string `yaml:"scopes,omitempty" json:"scopes,omitempty"` + TokenURL string `yaml:"token_url" json:"token_url"` + EndpointParams map[string]string `yaml:"endpoint_params,omitempty" json:"endpoint_params,omitempty"` +} + +// SetDirectory joins any relative file paths with dir. +func (a *OAuth2) SetDirectory(dir string) { + if a == nil { + return + } + a.ClientSecretFile = JoinDir(dir, a.ClientSecretFile) +} + +// HTTPClientConfig configures an HTTP client. +type HTTPClientConfig struct { + // The HTTP basic authentication credentials for the targets. + BasicAuth *BasicAuth `yaml:"basic_auth,omitempty" json:"basic_auth,omitempty"` + // The HTTP authorization credentials for the targets. + Authorization *Authorization `yaml:"authorization,omitempty" json:"authorization,omitempty"` + // The OAuth2 client credentials used to fetch a token for the targets. + OAuth2 *OAuth2 `yaml:"oauth2,omitempty" json:"oauth2,omitempty"` + // The bearer token for the targets. Deprecated in favour of + // Authorization.Credentials. + BearerToken Secret `yaml:"bearer_token,omitempty" json:"bearer_token,omitempty"` + // The bearer token file for the targets. Deprecated in favour of + // Authorization.CredentialsFile. + BearerTokenFile string `yaml:"bearer_token_file,omitempty" json:"bearer_token_file,omitempty"` + // HTTP proxy server to use to connect to the targets. + ProxyURL URL `yaml:"proxy_url,omitempty" json:"proxy_url,omitempty"` + // TLSConfig to use to connect to the targets. + TLSConfig TLSConfig `yaml:"tls_config,omitempty" json:"tls_config,omitempty"` + // FollowRedirects specifies whether the client should follow HTTP 3xx redirects. + // The omitempty flag is not set, because it would be hidden from the + // marshalled configuration when set to false. + FollowRedirects bool `yaml:"follow_redirects" json:"follow_redirects"` +} + +// SetDirectory joins any relative file paths with dir. +func (c *HTTPClientConfig) SetDirectory(dir string) { + if c == nil { + return + } + c.TLSConfig.SetDirectory(dir) + c.BasicAuth.SetDirectory(dir) + c.Authorization.SetDirectory(dir) + c.OAuth2.SetDirectory(dir) + c.BearerTokenFile = JoinDir(dir, c.BearerTokenFile) +} + +// Validate validates the HTTPClientConfig to check only one of BearerToken, +// BasicAuth and BearerTokenFile is configured. +func (c *HTTPClientConfig) Validate() error { + // Backwards compatibility with the bearer_token field. + if len(c.BearerToken) > 0 && len(c.BearerTokenFile) > 0 { + return fmt.Errorf("at most one of bearer_token & bearer_token_file must be configured") + } + if (c.BasicAuth != nil || c.OAuth2 != nil) && (len(c.BearerToken) > 0 || len(c.BearerTokenFile) > 0) { + return fmt.Errorf("at most one of basic_auth, oauth2, bearer_token & bearer_token_file must be configured") + } + if c.BasicAuth != nil && (string(c.BasicAuth.Password) != "" && c.BasicAuth.PasswordFile != "") { + return fmt.Errorf("at most one of basic_auth password & password_file must be configured") + } + if c.Authorization != nil { + if len(c.BearerToken) > 0 || len(c.BearerTokenFile) > 0 { + return fmt.Errorf("authorization is not compatible with bearer_token & bearer_token_file") + } + if string(c.Authorization.Credentials) != "" && c.Authorization.CredentialsFile != "" { + return fmt.Errorf("at most one of authorization credentials & credentials_file must be configured") + } + c.Authorization.Type = strings.TrimSpace(c.Authorization.Type) + if len(c.Authorization.Type) == 0 { + c.Authorization.Type = "Bearer" + } + if strings.ToLower(c.Authorization.Type) == "basic" { + return fmt.Errorf(`authorization type cannot be set to "basic", use "basic_auth" instead`) + } + if c.BasicAuth != nil || c.OAuth2 != nil { + return fmt.Errorf("at most one of basic_auth, oauth2 & authorization must be configured") + } + } else { + if len(c.BearerToken) > 0 { + c.Authorization = &Authorization{Credentials: c.BearerToken} + c.Authorization.Type = "Bearer" + c.BearerToken = "" + } + if len(c.BearerTokenFile) > 0 { + c.Authorization = &Authorization{CredentialsFile: c.BearerTokenFile} + c.Authorization.Type = "Bearer" + c.BearerTokenFile = "" + } + } + if c.OAuth2 != nil { + if c.BasicAuth != nil { + return fmt.Errorf("at most one of basic_auth, oauth2 & authorization must be configured") + } + if len(c.OAuth2.ClientID) == 0 { + return fmt.Errorf("oauth2 client_id must be configured") + } + if len(c.OAuth2.ClientSecret) == 0 && len(c.OAuth2.ClientSecretFile) == 0 { + return fmt.Errorf("either oauth2 client_secret or client_secret_file must be configured") + } + if len(c.OAuth2.TokenURL) == 0 { + return fmt.Errorf("oauth2 token_url must be configured") + } + if len(c.OAuth2.ClientSecret) > 0 && len(c.OAuth2.ClientSecretFile) > 0 { + return fmt.Errorf("at most one of oauth2 client_secret & client_secret_file must be configured") + } + } + return nil +} + +// UnmarshalYAML implements the yaml.Unmarshaler interface +func (c *HTTPClientConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { + type plain HTTPClientConfig + *c = DefaultHTTPClientConfig + if err := unmarshal((*plain)(c)); err != nil { + return err + } + return c.Validate() +} + +// UnmarshalJSON implements the json.Marshaler interface for URL. +func (c *HTTPClientConfig) UnmarshalJSON(data []byte) error { + type plain HTTPClientConfig + *c = DefaultHTTPClientConfig + if err := json.Unmarshal(data, (*plain)(c)); err != nil { + return err + } + return c.Validate() +} + +// UnmarshalYAML implements the yaml.Unmarshaler interface. +func (a *BasicAuth) UnmarshalYAML(unmarshal func(interface{}) error) error { + type plain BasicAuth + return unmarshal((*plain)(a)) +} + +// DialContextFunc defines the signature of the DialContext() function implemented +// by net.Dialer. +type DialContextFunc func(context.Context, string, string) (net.Conn, error) + +type httpClientOptions struct { + dialContextFunc DialContextFunc + keepAlivesEnabled bool + http2Enabled bool +} + +// HTTPClientOption defines an option that can be applied to the HTTP client. +type HTTPClientOption func(options *httpClientOptions) + +// WithDialContextFunc allows you to override func gets used for the actual dialing. The default is `net.Dialer.DialContext`. +func WithDialContextFunc(fn DialContextFunc) HTTPClientOption { + return func(opts *httpClientOptions) { + opts.dialContextFunc = fn + } +} + +// WithKeepAlivesDisabled allows to disable HTTP keepalive. +func WithKeepAlivesDisabled() HTTPClientOption { + return func(opts *httpClientOptions) { + opts.keepAlivesEnabled = false + } +} + +// WithHTTP2Disabled allows to disable HTTP2. +func WithHTTP2Disabled() HTTPClientOption { + return func(opts *httpClientOptions) { + opts.http2Enabled = false + } +} + +// NewClient returns a http.Client using the specified http.RoundTripper. +func newClient(rt http.RoundTripper) *http.Client { + return &http.Client{Transport: rt} +} + +// NewClientFromConfig returns a new HTTP client configured for the +// given config.HTTPClientConfig and config.HTTPClientOption. +// The name is used as go-conntrack metric label. +func NewClientFromConfig(cfg HTTPClientConfig, name string, optFuncs ...HTTPClientOption) (*http.Client, error) { + rt, err := NewRoundTripperFromConfig(cfg, name, optFuncs...) + if err != nil { + return nil, err + } + client := newClient(rt) + if !cfg.FollowRedirects { + client.CheckRedirect = func(*http.Request, []*http.Request) error { + return http.ErrUseLastResponse + } + } + return client, nil +} + +// NewRoundTripperFromConfig returns a new HTTP RoundTripper configured for the +// given config.HTTPClientConfig and config.HTTPClientOption. +// The name is used as go-conntrack metric label. +func NewRoundTripperFromConfig(cfg HTTPClientConfig, name string, optFuncs ...HTTPClientOption) (http.RoundTripper, error) { + opts := defaultHTTPClientOptions + for _, f := range optFuncs { + f(&opts) + } + + var dialContext func(ctx context.Context, network, addr string) (net.Conn, error) + + if opts.dialContextFunc != nil { + dialContext = conntrack.NewDialContextFunc( + conntrack.DialWithDialContextFunc((func(context.Context, string, string) (net.Conn, error))(opts.dialContextFunc)), + conntrack.DialWithTracing(), + conntrack.DialWithName(name)) + } else { + dialContext = conntrack.NewDialContextFunc( + conntrack.DialWithTracing(), + conntrack.DialWithName(name)) + } + + newRT := func(tlsConfig *tls.Config) (http.RoundTripper, error) { + // The only timeout we care about is the configured scrape timeout. + // It is applied on request. So we leave out any timings here. + var rt http.RoundTripper = &http.Transport{ + Proxy: http.ProxyURL(cfg.ProxyURL.URL), + MaxIdleConns: 20000, + MaxIdleConnsPerHost: 1000, // see https://github.com/golang/go/issues/13801 + DisableKeepAlives: !opts.keepAlivesEnabled, + TLSClientConfig: tlsConfig, + DisableCompression: true, + // 5 minutes is typically above the maximum sane scrape interval. So we can + // use keepalive for all configurations. + IdleConnTimeout: 5 * time.Minute, + TLSHandshakeTimeout: 10 * time.Second, + ExpectContinueTimeout: 1 * time.Second, + DialContext: dialContext, + } + if opts.http2Enabled { + // HTTP/2 support is golang has many problematic cornercases where + // dead connections would be kept and used in connection pools. + // https://github.com/golang/go/issues/32388 + // https://github.com/golang/go/issues/39337 + // https://github.com/golang/go/issues/39750 + // TODO: Re-Enable HTTP/2 once upstream issue is fixed. + // TODO: use ForceAttemptHTTP2 when we move to Go 1.13+. + err := http2.ConfigureTransport(rt.(*http.Transport)) + if err != nil { + return nil, err + } + } + + // If a authorization_credentials is provided, create a round tripper that will set the + // Authorization header correctly on each request. + if cfg.Authorization != nil && len(cfg.Authorization.Credentials) > 0 { + rt = NewAuthorizationCredentialsRoundTripper(cfg.Authorization.Type, cfg.Authorization.Credentials, rt) + } else if cfg.Authorization != nil && len(cfg.Authorization.CredentialsFile) > 0 { + rt = NewAuthorizationCredentialsFileRoundTripper(cfg.Authorization.Type, cfg.Authorization.CredentialsFile, rt) + } + // Backwards compatibility, be nice with importers who would not have + // called Validate(). + if len(cfg.BearerToken) > 0 { + rt = NewAuthorizationCredentialsRoundTripper("Bearer", cfg.BearerToken, rt) + } else if len(cfg.BearerTokenFile) > 0 { + rt = NewAuthorizationCredentialsFileRoundTripper("Bearer", cfg.BearerTokenFile, rt) + } + + if cfg.BasicAuth != nil { + rt = NewBasicAuthRoundTripper(cfg.BasicAuth.Username, cfg.BasicAuth.Password, cfg.BasicAuth.PasswordFile, rt) + } + + if cfg.OAuth2 != nil { + rt = NewOAuth2RoundTripper(cfg.OAuth2, rt) + } + // Return a new configured RoundTripper. + return rt, nil + } + + tlsConfig, err := NewTLSConfig(&cfg.TLSConfig) + if err != nil { + return nil, err + } + + if len(cfg.TLSConfig.CAFile) == 0 { + // No need for a RoundTripper that reloads the CA file automatically. + return newRT(tlsConfig) + } + + return NewTLSRoundTripper(tlsConfig, cfg.TLSConfig.CAFile, newRT) +} + +type authorizationCredentialsRoundTripper struct { + authType string + authCredentials Secret + rt http.RoundTripper +} + +// NewAuthorizationCredentialsRoundTripper adds the provided credentials to a +// request unless the authorization header has already been set. +func NewAuthorizationCredentialsRoundTripper(authType string, authCredentials Secret, rt http.RoundTripper) http.RoundTripper { + return &authorizationCredentialsRoundTripper{authType, authCredentials, rt} +} + +func (rt *authorizationCredentialsRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) { + if len(req.Header.Get("Authorization")) == 0 { + req = cloneRequest(req) + req.Header.Set("Authorization", fmt.Sprintf("%s %s", rt.authType, string(rt.authCredentials))) + } + return rt.rt.RoundTrip(req) +} + +func (rt *authorizationCredentialsRoundTripper) CloseIdleConnections() { + if ci, ok := rt.rt.(closeIdler); ok { + ci.CloseIdleConnections() + } +} + +type authorizationCredentialsFileRoundTripper struct { + authType string + authCredentialsFile string + rt http.RoundTripper +} + +// NewAuthorizationCredentialsFileRoundTripper adds the authorization +// credentials read from the provided file to a request unless the authorization +// header has already been set. This file is read for every request. +func NewAuthorizationCredentialsFileRoundTripper(authType, authCredentialsFile string, rt http.RoundTripper) http.RoundTripper { + return &authorizationCredentialsFileRoundTripper{authType, authCredentialsFile, rt} +} + +func (rt *authorizationCredentialsFileRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) { + if len(req.Header.Get("Authorization")) == 0 { + b, err := ioutil.ReadFile(rt.authCredentialsFile) + if err != nil { + return nil, fmt.Errorf("unable to read authorization credentials file %s: %s", rt.authCredentialsFile, err) + } + authCredentials := strings.TrimSpace(string(b)) + + req = cloneRequest(req) + req.Header.Set("Authorization", fmt.Sprintf("%s %s", rt.authType, authCredentials)) + } + + return rt.rt.RoundTrip(req) +} + +func (rt *authorizationCredentialsFileRoundTripper) CloseIdleConnections() { + if ci, ok := rt.rt.(closeIdler); ok { + ci.CloseIdleConnections() + } +} + +type basicAuthRoundTripper struct { + username string + password Secret + passwordFile string + rt http.RoundTripper +} + +// NewBasicAuthRoundTripper will apply a BASIC auth authorization header to a request unless it has +// already been set. +func NewBasicAuthRoundTripper(username string, password Secret, passwordFile string, rt http.RoundTripper) http.RoundTripper { + return &basicAuthRoundTripper{username, password, passwordFile, rt} +} + +func (rt *basicAuthRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) { + if len(req.Header.Get("Authorization")) != 0 { + return rt.rt.RoundTrip(req) + } + req = cloneRequest(req) + if rt.passwordFile != "" { + bs, err := ioutil.ReadFile(rt.passwordFile) + if err != nil { + return nil, fmt.Errorf("unable to read basic auth password file %s: %s", rt.passwordFile, err) + } + req.SetBasicAuth(rt.username, strings.TrimSpace(string(bs))) + } else { + req.SetBasicAuth(rt.username, strings.TrimSpace(string(rt.password))) + } + return rt.rt.RoundTrip(req) +} + +func (rt *basicAuthRoundTripper) CloseIdleConnections() { + if ci, ok := rt.rt.(closeIdler); ok { + ci.CloseIdleConnections() + } +} + +type oauth2RoundTripper struct { + config *OAuth2 + rt http.RoundTripper + next http.RoundTripper + secret string + mtx sync.RWMutex +} + +func NewOAuth2RoundTripper(config *OAuth2, next http.RoundTripper) http.RoundTripper { + return &oauth2RoundTripper{ + config: config, + next: next, + } +} + +func (rt *oauth2RoundTripper) RoundTrip(req *http.Request) (*http.Response, error) { + var ( + secret string + changed bool + ) + + if rt.config.ClientSecretFile != "" { + data, err := ioutil.ReadFile(rt.config.ClientSecretFile) + if err != nil { + return nil, fmt.Errorf("unable to read oauth2 client secret file %s: %s", rt.config.ClientSecretFile, err) + } + secret = strings.TrimSpace(string(data)) + rt.mtx.RLock() + changed = secret != rt.secret + rt.mtx.RUnlock() + } + + if changed || rt.rt == nil { + if rt.config.ClientSecret != "" { + secret = string(rt.config.ClientSecret) + } + + config := &clientcredentials.Config{ + ClientID: rt.config.ClientID, + ClientSecret: secret, + Scopes: rt.config.Scopes, + TokenURL: rt.config.TokenURL, + EndpointParams: mapToValues(rt.config.EndpointParams), + } + + tokenSource := config.TokenSource(context.Background()) + + rt.mtx.Lock() + rt.secret = secret + rt.rt = &oauth2.Transport{ + Base: rt.next, + Source: tokenSource, + } + rt.mtx.Unlock() + } + + rt.mtx.RLock() + currentRT := rt.rt + rt.mtx.RUnlock() + return currentRT.RoundTrip(req) +} + +func (rt *oauth2RoundTripper) CloseIdleConnections() { + // OAuth2 RT does not support CloseIdleConnections() but the next RT might. + if ci, ok := rt.next.(closeIdler); ok { + ci.CloseIdleConnections() + } +} + +func mapToValues(m map[string]string) url.Values { + v := url.Values{} + for name, value := range m { + v.Set(name, value) + } + + return v +} + +// cloneRequest returns a clone of the provided *http.Request. +// The clone is a shallow copy of the struct and its Header map. +func cloneRequest(r *http.Request) *http.Request { + // Shallow copy of the struct. + r2 := new(http.Request) + *r2 = *r + // Deep copy of the Header. + r2.Header = make(http.Header) + for k, s := range r.Header { + r2.Header[k] = s + } + return r2 +} + +// NewTLSConfig creates a new tls.Config from the given TLSConfig. +func NewTLSConfig(cfg *TLSConfig) (*tls.Config, error) { + tlsConfig := &tls.Config{InsecureSkipVerify: cfg.InsecureSkipVerify} + + // If a CA cert is provided then let's read it in so we can validate the + // scrape target's certificate properly. + if len(cfg.CAFile) > 0 { + b, err := readCAFile(cfg.CAFile) + if err != nil { + return nil, err + } + if !updateRootCA(tlsConfig, b) { + return nil, fmt.Errorf("unable to use specified CA cert %s", cfg.CAFile) + } + } + + if len(cfg.ServerName) > 0 { + tlsConfig.ServerName = cfg.ServerName + } + // If a client cert & key is provided then configure TLS config accordingly. + if len(cfg.CertFile) > 0 && len(cfg.KeyFile) == 0 { + return nil, fmt.Errorf("client cert file %q specified without client key file", cfg.CertFile) + } else if len(cfg.KeyFile) > 0 && len(cfg.CertFile) == 0 { + return nil, fmt.Errorf("client key file %q specified without client cert file", cfg.KeyFile) + } else if len(cfg.CertFile) > 0 && len(cfg.KeyFile) > 0 { + // Verify that client cert and key are valid. + if _, err := cfg.getClientCertificate(nil); err != nil { + return nil, err + } + tlsConfig.GetClientCertificate = cfg.getClientCertificate + } + + return tlsConfig, nil +} + +// TLSConfig configures the options for TLS connections. +type TLSConfig struct { + // The CA cert to use for the targets. + CAFile string `yaml:"ca_file,omitempty" json:"ca_file,omitempty"` + // The client cert file for the targets. + CertFile string `yaml:"cert_file,omitempty" json:"cert_file,omitempty"` + // The client key file for the targets. + KeyFile string `yaml:"key_file,omitempty" json:"key_file,omitempty"` + // Used to verify the hostname for the targets. + ServerName string `yaml:"server_name,omitempty" json:"server_name,omitempty"` + // Disable target certificate validation. + InsecureSkipVerify bool `yaml:"insecure_skip_verify" json:"insecure_skip_verify"` +} + +// SetDirectory joins any relative file paths with dir. +func (c *TLSConfig) SetDirectory(dir string) { + if c == nil { + return + } + c.CAFile = JoinDir(dir, c.CAFile) + c.CertFile = JoinDir(dir, c.CertFile) + c.KeyFile = JoinDir(dir, c.KeyFile) +} + +// UnmarshalYAML implements the yaml.Unmarshaler interface. +func (c *TLSConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { + type plain TLSConfig + return unmarshal((*plain)(c)) +} + +// getClientCertificate reads the pair of client cert and key from disk and returns a tls.Certificate. +func (c *TLSConfig) getClientCertificate(*tls.CertificateRequestInfo) (*tls.Certificate, error) { + cert, err := tls.LoadX509KeyPair(c.CertFile, c.KeyFile) + if err != nil { + return nil, fmt.Errorf("unable to use specified client cert (%s) & key (%s): %s", c.CertFile, c.KeyFile, err) + } + return &cert, nil +} + +// readCAFile reads the CA cert file from disk. +func readCAFile(f string) ([]byte, error) { + data, err := ioutil.ReadFile(f) + if err != nil { + return nil, fmt.Errorf("unable to load specified CA cert %s: %s", f, err) + } + return data, nil +} + +// updateRootCA parses the given byte slice as a series of PEM encoded certificates and updates tls.Config.RootCAs. +func updateRootCA(cfg *tls.Config, b []byte) bool { + caCertPool := x509.NewCertPool() + if !caCertPool.AppendCertsFromPEM(b) { + return false + } + cfg.RootCAs = caCertPool + return true +} + +// tlsRoundTripper is a RoundTripper that updates automatically its TLS +// configuration whenever the content of the CA file changes. +type tlsRoundTripper struct { + caFile string + // newRT returns a new RoundTripper. + newRT func(*tls.Config) (http.RoundTripper, error) + + mtx sync.RWMutex + rt http.RoundTripper + hashCAFile []byte + tlsConfig *tls.Config +} + +func NewTLSRoundTripper( + cfg *tls.Config, + caFile string, + newRT func(*tls.Config) (http.RoundTripper, error), +) (http.RoundTripper, error) { + t := &tlsRoundTripper{ + caFile: caFile, + newRT: newRT, + tlsConfig: cfg, + } + + rt, err := t.newRT(t.tlsConfig) + if err != nil { + return nil, err + } + t.rt = rt + + _, t.hashCAFile, err = t.getCAWithHash() + if err != nil { + return nil, err + } + + return t, nil +} + +func (t *tlsRoundTripper) getCAWithHash() ([]byte, []byte, error) { + b, err := readCAFile(t.caFile) + if err != nil { + return nil, nil, err + } + h := sha256.Sum256(b) + return b, h[:], nil + +} + +// RoundTrip implements the http.RoundTrip interface. +func (t *tlsRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) { + b, h, err := t.getCAWithHash() + if err != nil { + return nil, err + } + + t.mtx.RLock() + equal := bytes.Equal(h[:], t.hashCAFile) + rt := t.rt + t.mtx.RUnlock() + if equal { + // The CA cert hasn't changed, use the existing RoundTripper. + return rt.RoundTrip(req) + } + + // Create a new RoundTripper. + tlsConfig := t.tlsConfig.Clone() + if !updateRootCA(tlsConfig, b) { + return nil, fmt.Errorf("unable to use specified CA cert %s", t.caFile) + } + rt, err = t.newRT(tlsConfig) + if err != nil { + return nil, err + } + t.CloseIdleConnections() + + t.mtx.Lock() + t.rt = rt + t.hashCAFile = h[:] + t.mtx.Unlock() + + return rt.RoundTrip(req) +} + +func (t *tlsRoundTripper) CloseIdleConnections() { + t.mtx.RLock() + defer t.mtx.RUnlock() + if ci, ok := t.rt.(closeIdler); ok { + ci.CloseIdleConnections() + } +} + +func (c HTTPClientConfig) String() string { + b, err := yaml.Marshal(c) + if err != nil { + return fmt.Sprintf("", err) + } + return string(b) +} diff --git a/vendor/golang.org/x/net/internal/timeseries/timeseries.go b/vendor/golang.org/x/net/internal/timeseries/timeseries.go new file mode 100644 index 0000000000..dc5225b6d4 --- /dev/null +++ b/vendor/golang.org/x/net/internal/timeseries/timeseries.go @@ -0,0 +1,525 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package timeseries implements a time series structure for stats collection. +package timeseries // import "golang.org/x/net/internal/timeseries" + +import ( + "fmt" + "log" + "time" +) + +const ( + timeSeriesNumBuckets = 64 + minuteHourSeriesNumBuckets = 60 +) + +var timeSeriesResolutions = []time.Duration{ + 1 * time.Second, + 10 * time.Second, + 1 * time.Minute, + 10 * time.Minute, + 1 * time.Hour, + 6 * time.Hour, + 24 * time.Hour, // 1 day + 7 * 24 * time.Hour, // 1 week + 4 * 7 * 24 * time.Hour, // 4 weeks + 16 * 7 * 24 * time.Hour, // 16 weeks +} + +var minuteHourSeriesResolutions = []time.Duration{ + 1 * time.Second, + 1 * time.Minute, +} + +// An Observable is a kind of data that can be aggregated in a time series. +type Observable interface { + Multiply(ratio float64) // Multiplies the data in self by a given ratio + Add(other Observable) // Adds the data from a different observation to self + Clear() // Clears the observation so it can be reused. + CopyFrom(other Observable) // Copies the contents of a given observation to self +} + +// Float attaches the methods of Observable to a float64. +type Float float64 + +// NewFloat returns a Float. +func NewFloat() Observable { + f := Float(0) + return &f +} + +// String returns the float as a string. +func (f *Float) String() string { return fmt.Sprintf("%g", f.Value()) } + +// Value returns the float's value. +func (f *Float) Value() float64 { return float64(*f) } + +func (f *Float) Multiply(ratio float64) { *f *= Float(ratio) } + +func (f *Float) Add(other Observable) { + o := other.(*Float) + *f += *o +} + +func (f *Float) Clear() { *f = 0 } + +func (f *Float) CopyFrom(other Observable) { + o := other.(*Float) + *f = *o +} + +// A Clock tells the current time. +type Clock interface { + Time() time.Time +} + +type defaultClock int + +var defaultClockInstance defaultClock + +func (defaultClock) Time() time.Time { return time.Now() } + +// Information kept per level. Each level consists of a circular list of +// observations. The start of the level may be derived from end and the +// len(buckets) * sizeInMillis. +type tsLevel struct { + oldest int // index to oldest bucketed Observable + newest int // index to newest bucketed Observable + end time.Time // end timestamp for this level + size time.Duration // duration of the bucketed Observable + buckets []Observable // collections of observations + provider func() Observable // used for creating new Observable +} + +func (l *tsLevel) Clear() { + l.oldest = 0 + l.newest = len(l.buckets) - 1 + l.end = time.Time{} + for i := range l.buckets { + if l.buckets[i] != nil { + l.buckets[i].Clear() + l.buckets[i] = nil + } + } +} + +func (l *tsLevel) InitLevel(size time.Duration, numBuckets int, f func() Observable) { + l.size = size + l.provider = f + l.buckets = make([]Observable, numBuckets) +} + +// Keeps a sequence of levels. Each level is responsible for storing data at +// a given resolution. For example, the first level stores data at a one +// minute resolution while the second level stores data at a one hour +// resolution. + +// Each level is represented by a sequence of buckets. Each bucket spans an +// interval equal to the resolution of the level. New observations are added +// to the last bucket. +type timeSeries struct { + provider func() Observable // make more Observable + numBuckets int // number of buckets in each level + levels []*tsLevel // levels of bucketed Observable + lastAdd time.Time // time of last Observable tracked + total Observable // convenient aggregation of all Observable + clock Clock // Clock for getting current time + pending Observable // observations not yet bucketed + pendingTime time.Time // what time are we keeping in pending + dirty bool // if there are pending observations +} + +// init initializes a level according to the supplied criteria. +func (ts *timeSeries) init(resolutions []time.Duration, f func() Observable, numBuckets int, clock Clock) { + ts.provider = f + ts.numBuckets = numBuckets + ts.clock = clock + ts.levels = make([]*tsLevel, len(resolutions)) + + for i := range resolutions { + if i > 0 && resolutions[i-1] >= resolutions[i] { + log.Print("timeseries: resolutions must be monotonically increasing") + break + } + newLevel := new(tsLevel) + newLevel.InitLevel(resolutions[i], ts.numBuckets, ts.provider) + ts.levels[i] = newLevel + } + + ts.Clear() +} + +// Clear removes all observations from the time series. +func (ts *timeSeries) Clear() { + ts.lastAdd = time.Time{} + ts.total = ts.resetObservation(ts.total) + ts.pending = ts.resetObservation(ts.pending) + ts.pendingTime = time.Time{} + ts.dirty = false + + for i := range ts.levels { + ts.levels[i].Clear() + } +} + +// Add records an observation at the current time. +func (ts *timeSeries) Add(observation Observable) { + ts.AddWithTime(observation, ts.clock.Time()) +} + +// AddWithTime records an observation at the specified time. +func (ts *timeSeries) AddWithTime(observation Observable, t time.Time) { + + smallBucketDuration := ts.levels[0].size + + if t.After(ts.lastAdd) { + ts.lastAdd = t + } + + if t.After(ts.pendingTime) { + ts.advance(t) + ts.mergePendingUpdates() + ts.pendingTime = ts.levels[0].end + ts.pending.CopyFrom(observation) + ts.dirty = true + } else if t.After(ts.pendingTime.Add(-1 * smallBucketDuration)) { + // The observation is close enough to go into the pending bucket. + // This compensates for clock skewing and small scheduling delays + // by letting the update stay in the fast path. + ts.pending.Add(observation) + ts.dirty = true + } else { + ts.mergeValue(observation, t) + } +} + +// mergeValue inserts the observation at the specified time in the past into all levels. +func (ts *timeSeries) mergeValue(observation Observable, t time.Time) { + for _, level := range ts.levels { + index := (ts.numBuckets - 1) - int(level.end.Sub(t)/level.size) + if 0 <= index && index < ts.numBuckets { + bucketNumber := (level.oldest + index) % ts.numBuckets + if level.buckets[bucketNumber] == nil { + level.buckets[bucketNumber] = level.provider() + } + level.buckets[bucketNumber].Add(observation) + } + } + ts.total.Add(observation) +} + +// mergePendingUpdates applies the pending updates into all levels. +func (ts *timeSeries) mergePendingUpdates() { + if ts.dirty { + ts.mergeValue(ts.pending, ts.pendingTime) + ts.pending = ts.resetObservation(ts.pending) + ts.dirty = false + } +} + +// advance cycles the buckets at each level until the latest bucket in +// each level can hold the time specified. +func (ts *timeSeries) advance(t time.Time) { + if !t.After(ts.levels[0].end) { + return + } + for i := 0; i < len(ts.levels); i++ { + level := ts.levels[i] + if !level.end.Before(t) { + break + } + + // If the time is sufficiently far, just clear the level and advance + // directly. + if !t.Before(level.end.Add(level.size * time.Duration(ts.numBuckets))) { + for _, b := range level.buckets { + ts.resetObservation(b) + } + level.end = time.Unix(0, (t.UnixNano()/level.size.Nanoseconds())*level.size.Nanoseconds()) + } + + for t.After(level.end) { + level.end = level.end.Add(level.size) + level.newest = level.oldest + level.oldest = (level.oldest + 1) % ts.numBuckets + ts.resetObservation(level.buckets[level.newest]) + } + + t = level.end + } +} + +// Latest returns the sum of the num latest buckets from the level. +func (ts *timeSeries) Latest(level, num int) Observable { + now := ts.clock.Time() + if ts.levels[0].end.Before(now) { + ts.advance(now) + } + + ts.mergePendingUpdates() + + result := ts.provider() + l := ts.levels[level] + index := l.newest + + for i := 0; i < num; i++ { + if l.buckets[index] != nil { + result.Add(l.buckets[index]) + } + if index == 0 { + index = ts.numBuckets + } + index-- + } + + return result +} + +// LatestBuckets returns a copy of the num latest buckets from level. +func (ts *timeSeries) LatestBuckets(level, num int) []Observable { + if level < 0 || level > len(ts.levels) { + log.Print("timeseries: bad level argument: ", level) + return nil + } + if num < 0 || num >= ts.numBuckets { + log.Print("timeseries: bad num argument: ", num) + return nil + } + + results := make([]Observable, num) + now := ts.clock.Time() + if ts.levels[0].end.Before(now) { + ts.advance(now) + } + + ts.mergePendingUpdates() + + l := ts.levels[level] + index := l.newest + + for i := 0; i < num; i++ { + result := ts.provider() + results[i] = result + if l.buckets[index] != nil { + result.CopyFrom(l.buckets[index]) + } + + if index == 0 { + index = ts.numBuckets + } + index -= 1 + } + return results +} + +// ScaleBy updates observations by scaling by factor. +func (ts *timeSeries) ScaleBy(factor float64) { + for _, l := range ts.levels { + for i := 0; i < ts.numBuckets; i++ { + l.buckets[i].Multiply(factor) + } + } + + ts.total.Multiply(factor) + ts.pending.Multiply(factor) +} + +// Range returns the sum of observations added over the specified time range. +// If start or finish times don't fall on bucket boundaries of the same +// level, then return values are approximate answers. +func (ts *timeSeries) Range(start, finish time.Time) Observable { + return ts.ComputeRange(start, finish, 1)[0] +} + +// Recent returns the sum of observations from the last delta. +func (ts *timeSeries) Recent(delta time.Duration) Observable { + now := ts.clock.Time() + return ts.Range(now.Add(-delta), now) +} + +// Total returns the total of all observations. +func (ts *timeSeries) Total() Observable { + ts.mergePendingUpdates() + return ts.total +} + +// ComputeRange computes a specified number of values into a slice using +// the observations recorded over the specified time period. The return +// values are approximate if the start or finish times don't fall on the +// bucket boundaries at the same level or if the number of buckets spanning +// the range is not an integral multiple of num. +func (ts *timeSeries) ComputeRange(start, finish time.Time, num int) []Observable { + if start.After(finish) { + log.Printf("timeseries: start > finish, %v>%v", start, finish) + return nil + } + + if num < 0 { + log.Printf("timeseries: num < 0, %v", num) + return nil + } + + results := make([]Observable, num) + + for _, l := range ts.levels { + if !start.Before(l.end.Add(-l.size * time.Duration(ts.numBuckets))) { + ts.extract(l, start, finish, num, results) + return results + } + } + + // Failed to find a level that covers the desired range. So just + // extract from the last level, even if it doesn't cover the entire + // desired range. + ts.extract(ts.levels[len(ts.levels)-1], start, finish, num, results) + + return results +} + +// RecentList returns the specified number of values in slice over the most +// recent time period of the specified range. +func (ts *timeSeries) RecentList(delta time.Duration, num int) []Observable { + if delta < 0 { + return nil + } + now := ts.clock.Time() + return ts.ComputeRange(now.Add(-delta), now, num) +} + +// extract returns a slice of specified number of observations from a given +// level over a given range. +func (ts *timeSeries) extract(l *tsLevel, start, finish time.Time, num int, results []Observable) { + ts.mergePendingUpdates() + + srcInterval := l.size + dstInterval := finish.Sub(start) / time.Duration(num) + dstStart := start + srcStart := l.end.Add(-srcInterval * time.Duration(ts.numBuckets)) + + srcIndex := 0 + + // Where should scanning start? + if dstStart.After(srcStart) { + advance := int(dstStart.Sub(srcStart) / srcInterval) + srcIndex += advance + srcStart = srcStart.Add(time.Duration(advance) * srcInterval) + } + + // The i'th value is computed as show below. + // interval = (finish/start)/num + // i'th value = sum of observation in range + // [ start + i * interval, + // start + (i + 1) * interval ) + for i := 0; i < num; i++ { + results[i] = ts.resetObservation(results[i]) + dstEnd := dstStart.Add(dstInterval) + for srcIndex < ts.numBuckets && srcStart.Before(dstEnd) { + srcEnd := srcStart.Add(srcInterval) + if srcEnd.After(ts.lastAdd) { + srcEnd = ts.lastAdd + } + + if !srcEnd.Before(dstStart) { + srcValue := l.buckets[(srcIndex+l.oldest)%ts.numBuckets] + if !srcStart.Before(dstStart) && !srcEnd.After(dstEnd) { + // dst completely contains src. + if srcValue != nil { + results[i].Add(srcValue) + } + } else { + // dst partially overlaps src. + overlapStart := maxTime(srcStart, dstStart) + overlapEnd := minTime(srcEnd, dstEnd) + base := srcEnd.Sub(srcStart) + fraction := overlapEnd.Sub(overlapStart).Seconds() / base.Seconds() + + used := ts.provider() + if srcValue != nil { + used.CopyFrom(srcValue) + } + used.Multiply(fraction) + results[i].Add(used) + } + + if srcEnd.After(dstEnd) { + break + } + } + srcIndex++ + srcStart = srcStart.Add(srcInterval) + } + dstStart = dstStart.Add(dstInterval) + } +} + +// resetObservation clears the content so the struct may be reused. +func (ts *timeSeries) resetObservation(observation Observable) Observable { + if observation == nil { + observation = ts.provider() + } else { + observation.Clear() + } + return observation +} + +// TimeSeries tracks data at granularities from 1 second to 16 weeks. +type TimeSeries struct { + timeSeries +} + +// NewTimeSeries creates a new TimeSeries using the function provided for creating new Observable. +func NewTimeSeries(f func() Observable) *TimeSeries { + return NewTimeSeriesWithClock(f, defaultClockInstance) +} + +// NewTimeSeriesWithClock creates a new TimeSeries using the function provided for creating new Observable and the clock for +// assigning timestamps. +func NewTimeSeriesWithClock(f func() Observable, clock Clock) *TimeSeries { + ts := new(TimeSeries) + ts.timeSeries.init(timeSeriesResolutions, f, timeSeriesNumBuckets, clock) + return ts +} + +// MinuteHourSeries tracks data at granularities of 1 minute and 1 hour. +type MinuteHourSeries struct { + timeSeries +} + +// NewMinuteHourSeries creates a new MinuteHourSeries using the function provided for creating new Observable. +func NewMinuteHourSeries(f func() Observable) *MinuteHourSeries { + return NewMinuteHourSeriesWithClock(f, defaultClockInstance) +} + +// NewMinuteHourSeriesWithClock creates a new MinuteHourSeries using the function provided for creating new Observable and the clock for +// assigning timestamps. +func NewMinuteHourSeriesWithClock(f func() Observable, clock Clock) *MinuteHourSeries { + ts := new(MinuteHourSeries) + ts.timeSeries.init(minuteHourSeriesResolutions, f, + minuteHourSeriesNumBuckets, clock) + return ts +} + +func (ts *MinuteHourSeries) Minute() Observable { + return ts.timeSeries.Latest(0, 60) +} + +func (ts *MinuteHourSeries) Hour() Observable { + return ts.timeSeries.Latest(1, 60) +} + +func minTime(a, b time.Time) time.Time { + if a.Before(b) { + return a + } + return b +} + +func maxTime(a, b time.Time) time.Time { + if a.After(b) { + return a + } + return b +} diff --git a/vendor/golang.org/x/net/trace/events.go b/vendor/golang.org/x/net/trace/events.go new file mode 100644 index 0000000000..c646a6952e --- /dev/null +++ b/vendor/golang.org/x/net/trace/events.go @@ -0,0 +1,532 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package trace + +import ( + "bytes" + "fmt" + "html/template" + "io" + "log" + "net/http" + "runtime" + "sort" + "strconv" + "strings" + "sync" + "sync/atomic" + "text/tabwriter" + "time" +) + +const maxEventsPerLog = 100 + +type bucket struct { + MaxErrAge time.Duration + String string +} + +var buckets = []bucket{ + {0, "total"}, + {10 * time.Second, "errs<10s"}, + {1 * time.Minute, "errs<1m"}, + {10 * time.Minute, "errs<10m"}, + {1 * time.Hour, "errs<1h"}, + {10 * time.Hour, "errs<10h"}, + {24000 * time.Hour, "errors"}, +} + +// RenderEvents renders the HTML page typically served at /debug/events. +// It does not do any auth checking. The request may be nil. +// +// Most users will use the Events handler. +func RenderEvents(w http.ResponseWriter, req *http.Request, sensitive bool) { + now := time.Now() + data := &struct { + Families []string // family names + Buckets []bucket + Counts [][]int // eventLog count per family/bucket + + // Set when a bucket has been selected. + Family string + Bucket int + EventLogs eventLogs + Expanded bool + }{ + Buckets: buckets, + } + + data.Families = make([]string, 0, len(families)) + famMu.RLock() + for name := range families { + data.Families = append(data.Families, name) + } + famMu.RUnlock() + sort.Strings(data.Families) + + // Count the number of eventLogs in each family for each error age. + data.Counts = make([][]int, len(data.Families)) + for i, name := range data.Families { + // TODO(sameer): move this loop under the family lock. + f := getEventFamily(name) + data.Counts[i] = make([]int, len(data.Buckets)) + for j, b := range data.Buckets { + data.Counts[i][j] = f.Count(now, b.MaxErrAge) + } + } + + if req != nil { + var ok bool + data.Family, data.Bucket, ok = parseEventsArgs(req) + if !ok { + // No-op + } else { + data.EventLogs = getEventFamily(data.Family).Copy(now, buckets[data.Bucket].MaxErrAge) + } + if data.EventLogs != nil { + defer data.EventLogs.Free() + sort.Sort(data.EventLogs) + } + if exp, err := strconv.ParseBool(req.FormValue("exp")); err == nil { + data.Expanded = exp + } + } + + famMu.RLock() + defer famMu.RUnlock() + if err := eventsTmpl().Execute(w, data); err != nil { + log.Printf("net/trace: Failed executing template: %v", err) + } +} + +func parseEventsArgs(req *http.Request) (fam string, b int, ok bool) { + fam, bStr := req.FormValue("fam"), req.FormValue("b") + if fam == "" || bStr == "" { + return "", 0, false + } + b, err := strconv.Atoi(bStr) + if err != nil || b < 0 || b >= len(buckets) { + return "", 0, false + } + return fam, b, true +} + +// An EventLog provides a log of events associated with a specific object. +type EventLog interface { + // Printf formats its arguments with fmt.Sprintf and adds the + // result to the event log. + Printf(format string, a ...interface{}) + + // Errorf is like Printf, but it marks this event as an error. + Errorf(format string, a ...interface{}) + + // Finish declares that this event log is complete. + // The event log should not be used after calling this method. + Finish() +} + +// NewEventLog returns a new EventLog with the specified family name +// and title. +func NewEventLog(family, title string) EventLog { + el := newEventLog() + el.ref() + el.Family, el.Title = family, title + el.Start = time.Now() + el.events = make([]logEntry, 0, maxEventsPerLog) + el.stack = make([]uintptr, 32) + n := runtime.Callers(2, el.stack) + el.stack = el.stack[:n] + + getEventFamily(family).add(el) + return el +} + +func (el *eventLog) Finish() { + getEventFamily(el.Family).remove(el) + el.unref() // matches ref in New +} + +var ( + famMu sync.RWMutex + families = make(map[string]*eventFamily) // family name => family +) + +func getEventFamily(fam string) *eventFamily { + famMu.Lock() + defer famMu.Unlock() + f := families[fam] + if f == nil { + f = &eventFamily{} + families[fam] = f + } + return f +} + +type eventFamily struct { + mu sync.RWMutex + eventLogs eventLogs +} + +func (f *eventFamily) add(el *eventLog) { + f.mu.Lock() + f.eventLogs = append(f.eventLogs, el) + f.mu.Unlock() +} + +func (f *eventFamily) remove(el *eventLog) { + f.mu.Lock() + defer f.mu.Unlock() + for i, el0 := range f.eventLogs { + if el == el0 { + copy(f.eventLogs[i:], f.eventLogs[i+1:]) + f.eventLogs = f.eventLogs[:len(f.eventLogs)-1] + return + } + } +} + +func (f *eventFamily) Count(now time.Time, maxErrAge time.Duration) (n int) { + f.mu.RLock() + defer f.mu.RUnlock() + for _, el := range f.eventLogs { + if el.hasRecentError(now, maxErrAge) { + n++ + } + } + return +} + +func (f *eventFamily) Copy(now time.Time, maxErrAge time.Duration) (els eventLogs) { + f.mu.RLock() + defer f.mu.RUnlock() + els = make(eventLogs, 0, len(f.eventLogs)) + for _, el := range f.eventLogs { + if el.hasRecentError(now, maxErrAge) { + el.ref() + els = append(els, el) + } + } + return +} + +type eventLogs []*eventLog + +// Free calls unref on each element of the list. +func (els eventLogs) Free() { + for _, el := range els { + el.unref() + } +} + +// eventLogs may be sorted in reverse chronological order. +func (els eventLogs) Len() int { return len(els) } +func (els eventLogs) Less(i, j int) bool { return els[i].Start.After(els[j].Start) } +func (els eventLogs) Swap(i, j int) { els[i], els[j] = els[j], els[i] } + +// A logEntry is a timestamped log entry in an event log. +type logEntry struct { + When time.Time + Elapsed time.Duration // since previous event in log + NewDay bool // whether this event is on a different day to the previous event + What string + IsErr bool +} + +// WhenString returns a string representation of the elapsed time of the event. +// It will include the date if midnight was crossed. +func (e logEntry) WhenString() string { + if e.NewDay { + return e.When.Format("2006/01/02 15:04:05.000000") + } + return e.When.Format("15:04:05.000000") +} + +// An eventLog represents an active event log. +type eventLog struct { + // Family is the top-level grouping of event logs to which this belongs. + Family string + + // Title is the title of this event log. + Title string + + // Timing information. + Start time.Time + + // Call stack where this event log was created. + stack []uintptr + + // Append-only sequence of events. + // + // TODO(sameer): change this to a ring buffer to avoid the array copy + // when we hit maxEventsPerLog. + mu sync.RWMutex + events []logEntry + LastErrorTime time.Time + discarded int + + refs int32 // how many buckets this is in +} + +func (el *eventLog) reset() { + // Clear all but the mutex. Mutexes may not be copied, even when unlocked. + el.Family = "" + el.Title = "" + el.Start = time.Time{} + el.stack = nil + el.events = nil + el.LastErrorTime = time.Time{} + el.discarded = 0 + el.refs = 0 +} + +func (el *eventLog) hasRecentError(now time.Time, maxErrAge time.Duration) bool { + if maxErrAge == 0 { + return true + } + el.mu.RLock() + defer el.mu.RUnlock() + return now.Sub(el.LastErrorTime) < maxErrAge +} + +// delta returns the elapsed time since the last event or the log start, +// and whether it spans midnight. +// L >= el.mu +func (el *eventLog) delta(t time.Time) (time.Duration, bool) { + if len(el.events) == 0 { + return t.Sub(el.Start), false + } + prev := el.events[len(el.events)-1].When + return t.Sub(prev), prev.Day() != t.Day() + +} + +func (el *eventLog) Printf(format string, a ...interface{}) { + el.printf(false, format, a...) +} + +func (el *eventLog) Errorf(format string, a ...interface{}) { + el.printf(true, format, a...) +} + +func (el *eventLog) printf(isErr bool, format string, a ...interface{}) { + e := logEntry{When: time.Now(), IsErr: isErr, What: fmt.Sprintf(format, a...)} + el.mu.Lock() + e.Elapsed, e.NewDay = el.delta(e.When) + if len(el.events) < maxEventsPerLog { + el.events = append(el.events, e) + } else { + // Discard the oldest event. + if el.discarded == 0 { + // el.discarded starts at two to count for the event it + // is replacing, plus the next one that we are about to + // drop. + el.discarded = 2 + } else { + el.discarded++ + } + // TODO(sameer): if this causes allocations on a critical path, + // change eventLog.What to be a fmt.Stringer, as in trace.go. + el.events[0].What = fmt.Sprintf("(%d events discarded)", el.discarded) + // The timestamp of the discarded meta-event should be + // the time of the last event it is representing. + el.events[0].When = el.events[1].When + copy(el.events[1:], el.events[2:]) + el.events[maxEventsPerLog-1] = e + } + if e.IsErr { + el.LastErrorTime = e.When + } + el.mu.Unlock() +} + +func (el *eventLog) ref() { + atomic.AddInt32(&el.refs, 1) +} + +func (el *eventLog) unref() { + if atomic.AddInt32(&el.refs, -1) == 0 { + freeEventLog(el) + } +} + +func (el *eventLog) When() string { + return el.Start.Format("2006/01/02 15:04:05.000000") +} + +func (el *eventLog) ElapsedTime() string { + elapsed := time.Since(el.Start) + return fmt.Sprintf("%.6f", elapsed.Seconds()) +} + +func (el *eventLog) Stack() string { + buf := new(bytes.Buffer) + tw := tabwriter.NewWriter(buf, 1, 8, 1, '\t', 0) + printStackRecord(tw, el.stack) + tw.Flush() + return buf.String() +} + +// printStackRecord prints the function + source line information +// for a single stack trace. +// Adapted from runtime/pprof/pprof.go. +func printStackRecord(w io.Writer, stk []uintptr) { + for _, pc := range stk { + f := runtime.FuncForPC(pc) + if f == nil { + continue + } + file, line := f.FileLine(pc) + name := f.Name() + // Hide runtime.goexit and any runtime functions at the beginning. + if strings.HasPrefix(name, "runtime.") { + continue + } + fmt.Fprintf(w, "# %s\t%s:%d\n", name, file, line) + } +} + +func (el *eventLog) Events() []logEntry { + el.mu.RLock() + defer el.mu.RUnlock() + return el.events +} + +// freeEventLogs is a freelist of *eventLog +var freeEventLogs = make(chan *eventLog, 1000) + +// newEventLog returns a event log ready to use. +func newEventLog() *eventLog { + select { + case el := <-freeEventLogs: + return el + default: + return new(eventLog) + } +} + +// freeEventLog adds el to freeEventLogs if there's room. +// This is non-blocking. +func freeEventLog(el *eventLog) { + el.reset() + select { + case freeEventLogs <- el: + default: + } +} + +var eventsTmplCache *template.Template +var eventsTmplOnce sync.Once + +func eventsTmpl() *template.Template { + eventsTmplOnce.Do(func() { + eventsTmplCache = template.Must(template.New("events").Funcs(template.FuncMap{ + "elapsed": elapsed, + "trimSpace": strings.TrimSpace, + }).Parse(eventsHTML)) + }) + return eventsTmplCache +} + +const eventsHTML = ` + + + events + + + + +

/debug/events

+ + + {{range $i, $fam := .Families}} + + + + {{range $j, $bucket := $.Buckets}} + {{$n := index $.Counts $i $j}} + + {{end}} + + {{end}} +
{{$fam}} + {{if $n}}{{end}} + [{{$n}} {{$bucket.String}}] + {{if $n}}{{end}} +
+ +{{if $.EventLogs}} +
+

Family: {{$.Family}}

+ +{{if $.Expanded}}{{end}} +[Summary]{{if $.Expanded}}{{end}} + +{{if not $.Expanded}}{{end}} +[Expanded]{{if not $.Expanded}}{{end}} + + + + {{range $el := $.EventLogs}} + + + + + {{if $.Expanded}} + + + + + + {{range $el.Events}} + + + + + + {{end}} + {{end}} + {{end}} +
WhenElapsed
{{$el.When}}{{$el.ElapsedTime}}{{$el.Title}} +
{{$el.Stack|trimSpace}}
{{.WhenString}}{{elapsed .Elapsed}}.{{if .IsErr}}E{{else}}.{{end}}. {{.What}}
+{{end}} + + +` diff --git a/vendor/golang.org/x/net/trace/histogram.go b/vendor/golang.org/x/net/trace/histogram.go new file mode 100644 index 0000000000..9bf4286c79 --- /dev/null +++ b/vendor/golang.org/x/net/trace/histogram.go @@ -0,0 +1,365 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package trace + +// This file implements histogramming for RPC statistics collection. + +import ( + "bytes" + "fmt" + "html/template" + "log" + "math" + "sync" + + "golang.org/x/net/internal/timeseries" +) + +const ( + bucketCount = 38 +) + +// histogram keeps counts of values in buckets that are spaced +// out in powers of 2: 0-1, 2-3, 4-7... +// histogram implements timeseries.Observable +type histogram struct { + sum int64 // running total of measurements + sumOfSquares float64 // square of running total + buckets []int64 // bucketed values for histogram + value int // holds a single value as an optimization + valueCount int64 // number of values recorded for single value +} + +// AddMeasurement records a value measurement observation to the histogram. +func (h *histogram) addMeasurement(value int64) { + // TODO: assert invariant + h.sum += value + h.sumOfSquares += float64(value) * float64(value) + + bucketIndex := getBucket(value) + + if h.valueCount == 0 || (h.valueCount > 0 && h.value == bucketIndex) { + h.value = bucketIndex + h.valueCount++ + } else { + h.allocateBuckets() + h.buckets[bucketIndex]++ + } +} + +func (h *histogram) allocateBuckets() { + if h.buckets == nil { + h.buckets = make([]int64, bucketCount) + h.buckets[h.value] = h.valueCount + h.value = 0 + h.valueCount = -1 + } +} + +func log2(i int64) int { + n := 0 + for ; i >= 0x100; i >>= 8 { + n += 8 + } + for ; i > 0; i >>= 1 { + n += 1 + } + return n +} + +func getBucket(i int64) (index int) { + index = log2(i) - 1 + if index < 0 { + index = 0 + } + if index >= bucketCount { + index = bucketCount - 1 + } + return +} + +// Total returns the number of recorded observations. +func (h *histogram) total() (total int64) { + if h.valueCount >= 0 { + total = h.valueCount + } + for _, val := range h.buckets { + total += int64(val) + } + return +} + +// Average returns the average value of recorded observations. +func (h *histogram) average() float64 { + t := h.total() + if t == 0 { + return 0 + } + return float64(h.sum) / float64(t) +} + +// Variance returns the variance of recorded observations. +func (h *histogram) variance() float64 { + t := float64(h.total()) + if t == 0 { + return 0 + } + s := float64(h.sum) / t + return h.sumOfSquares/t - s*s +} + +// StandardDeviation returns the standard deviation of recorded observations. +func (h *histogram) standardDeviation() float64 { + return math.Sqrt(h.variance()) +} + +// PercentileBoundary estimates the value that the given fraction of recorded +// observations are less than. +func (h *histogram) percentileBoundary(percentile float64) int64 { + total := h.total() + + // Corner cases (make sure result is strictly less than Total()) + if total == 0 { + return 0 + } else if total == 1 { + return int64(h.average()) + } + + percentOfTotal := round(float64(total) * percentile) + var runningTotal int64 + + for i := range h.buckets { + value := h.buckets[i] + runningTotal += value + if runningTotal == percentOfTotal { + // We hit an exact bucket boundary. If the next bucket has data, it is a + // good estimate of the value. If the bucket is empty, we interpolate the + // midpoint between the next bucket's boundary and the next non-zero + // bucket. If the remaining buckets are all empty, then we use the + // boundary for the next bucket as the estimate. + j := uint8(i + 1) + min := bucketBoundary(j) + if runningTotal < total { + for h.buckets[j] == 0 { + j++ + } + } + max := bucketBoundary(j) + return min + round(float64(max-min)/2) + } else if runningTotal > percentOfTotal { + // The value is in this bucket. Interpolate the value. + delta := runningTotal - percentOfTotal + percentBucket := float64(value-delta) / float64(value) + bucketMin := bucketBoundary(uint8(i)) + nextBucketMin := bucketBoundary(uint8(i + 1)) + bucketSize := nextBucketMin - bucketMin + return bucketMin + round(percentBucket*float64(bucketSize)) + } + } + return bucketBoundary(bucketCount - 1) +} + +// Median returns the estimated median of the observed values. +func (h *histogram) median() int64 { + return h.percentileBoundary(0.5) +} + +// Add adds other to h. +func (h *histogram) Add(other timeseries.Observable) { + o := other.(*histogram) + if o.valueCount == 0 { + // Other histogram is empty + } else if h.valueCount >= 0 && o.valueCount > 0 && h.value == o.value { + // Both have a single bucketed value, aggregate them + h.valueCount += o.valueCount + } else { + // Two different values necessitate buckets in this histogram + h.allocateBuckets() + if o.valueCount >= 0 { + h.buckets[o.value] += o.valueCount + } else { + for i := range h.buckets { + h.buckets[i] += o.buckets[i] + } + } + } + h.sumOfSquares += o.sumOfSquares + h.sum += o.sum +} + +// Clear resets the histogram to an empty state, removing all observed values. +func (h *histogram) Clear() { + h.buckets = nil + h.value = 0 + h.valueCount = 0 + h.sum = 0 + h.sumOfSquares = 0 +} + +// CopyFrom copies from other, which must be a *histogram, into h. +func (h *histogram) CopyFrom(other timeseries.Observable) { + o := other.(*histogram) + if o.valueCount == -1 { + h.allocateBuckets() + copy(h.buckets, o.buckets) + } + h.sum = o.sum + h.sumOfSquares = o.sumOfSquares + h.value = o.value + h.valueCount = o.valueCount +} + +// Multiply scales the histogram by the specified ratio. +func (h *histogram) Multiply(ratio float64) { + if h.valueCount == -1 { + for i := range h.buckets { + h.buckets[i] = int64(float64(h.buckets[i]) * ratio) + } + } else { + h.valueCount = int64(float64(h.valueCount) * ratio) + } + h.sum = int64(float64(h.sum) * ratio) + h.sumOfSquares = h.sumOfSquares * ratio +} + +// New creates a new histogram. +func (h *histogram) New() timeseries.Observable { + r := new(histogram) + r.Clear() + return r +} + +func (h *histogram) String() string { + return fmt.Sprintf("%d, %f, %d, %d, %v", + h.sum, h.sumOfSquares, h.value, h.valueCount, h.buckets) +} + +// round returns the closest int64 to the argument +func round(in float64) int64 { + return int64(math.Floor(in + 0.5)) +} + +// bucketBoundary returns the first value in the bucket. +func bucketBoundary(bucket uint8) int64 { + if bucket == 0 { + return 0 + } + return 1 << bucket +} + +// bucketData holds data about a specific bucket for use in distTmpl. +type bucketData struct { + Lower, Upper int64 + N int64 + Pct, CumulativePct float64 + GraphWidth int +} + +// data holds data about a Distribution for use in distTmpl. +type data struct { + Buckets []*bucketData + Count, Median int64 + Mean, StandardDeviation float64 +} + +// maxHTMLBarWidth is the maximum width of the HTML bar for visualizing buckets. +const maxHTMLBarWidth = 350.0 + +// newData returns data representing h for use in distTmpl. +func (h *histogram) newData() *data { + // Force the allocation of buckets to simplify the rendering implementation + h.allocateBuckets() + // We scale the bars on the right so that the largest bar is + // maxHTMLBarWidth pixels in width. + maxBucket := int64(0) + for _, n := range h.buckets { + if n > maxBucket { + maxBucket = n + } + } + total := h.total() + barsizeMult := maxHTMLBarWidth / float64(maxBucket) + var pctMult float64 + if total == 0 { + pctMult = 1.0 + } else { + pctMult = 100.0 / float64(total) + } + + buckets := make([]*bucketData, len(h.buckets)) + runningTotal := int64(0) + for i, n := range h.buckets { + if n == 0 { + continue + } + runningTotal += n + var upperBound int64 + if i < bucketCount-1 { + upperBound = bucketBoundary(uint8(i + 1)) + } else { + upperBound = math.MaxInt64 + } + buckets[i] = &bucketData{ + Lower: bucketBoundary(uint8(i)), + Upper: upperBound, + N: n, + Pct: float64(n) * pctMult, + CumulativePct: float64(runningTotal) * pctMult, + GraphWidth: int(float64(n) * barsizeMult), + } + } + return &data{ + Buckets: buckets, + Count: total, + Median: h.median(), + Mean: h.average(), + StandardDeviation: h.standardDeviation(), + } +} + +func (h *histogram) html() template.HTML { + buf := new(bytes.Buffer) + if err := distTmpl().Execute(buf, h.newData()); err != nil { + buf.Reset() + log.Printf("net/trace: couldn't execute template: %v", err) + } + return template.HTML(buf.String()) +} + +var distTmplCache *template.Template +var distTmplOnce sync.Once + +func distTmpl() *template.Template { + distTmplOnce.Do(func() { + // Input: data + distTmplCache = template.Must(template.New("distTmpl").Parse(` + + + + + + + +
Count: {{.Count}}Mean: {{printf "%.0f" .Mean}}StdDev: {{printf "%.0f" .StandardDeviation}}Median: {{.Median}}
+
+ +{{range $b := .Buckets}} +{{if $b}} + + + + + + + + + +{{end}} +{{end}} +
[{{.Lower}},{{.Upper}}){{.N}}{{printf "%#.3f" .Pct}}%{{printf "%#.3f" .CumulativePct}}%
+`)) + }) + return distTmplCache +} diff --git a/vendor/golang.org/x/net/trace/trace.go b/vendor/golang.org/x/net/trace/trace.go new file mode 100644 index 0000000000..3ebf6f2daa --- /dev/null +++ b/vendor/golang.org/x/net/trace/trace.go @@ -0,0 +1,1130 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* +Package trace implements tracing of requests and long-lived objects. +It exports HTTP interfaces on /debug/requests and /debug/events. + +A trace.Trace provides tracing for short-lived objects, usually requests. +A request handler might be implemented like this: + + func fooHandler(w http.ResponseWriter, req *http.Request) { + tr := trace.New("mypkg.Foo", req.URL.Path) + defer tr.Finish() + ... + tr.LazyPrintf("some event %q happened", str) + ... + if err := somethingImportant(); err != nil { + tr.LazyPrintf("somethingImportant failed: %v", err) + tr.SetError() + } + } + +The /debug/requests HTTP endpoint organizes the traces by family, +errors, and duration. It also provides histogram of request duration +for each family. + +A trace.EventLog provides tracing for long-lived objects, such as RPC +connections. + + // A Fetcher fetches URL paths for a single domain. + type Fetcher struct { + domain string + events trace.EventLog + } + + func NewFetcher(domain string) *Fetcher { + return &Fetcher{ + domain, + trace.NewEventLog("mypkg.Fetcher", domain), + } + } + + func (f *Fetcher) Fetch(path string) (string, error) { + resp, err := http.Get("http://" + f.domain + "/" + path) + if err != nil { + f.events.Errorf("Get(%q) = %v", path, err) + return "", err + } + f.events.Printf("Get(%q) = %s", path, resp.Status) + ... + } + + func (f *Fetcher) Close() error { + f.events.Finish() + return nil + } + +The /debug/events HTTP endpoint organizes the event logs by family and +by time since the last error. The expanded view displays recent log +entries and the log's call stack. +*/ +package trace // import "golang.org/x/net/trace" + +import ( + "bytes" + "context" + "fmt" + "html/template" + "io" + "log" + "net" + "net/http" + "net/url" + "runtime" + "sort" + "strconv" + "sync" + "sync/atomic" + "time" + + "golang.org/x/net/internal/timeseries" +) + +// DebugUseAfterFinish controls whether to debug uses of Trace values after finishing. +// FOR DEBUGGING ONLY. This will slow down the program. +var DebugUseAfterFinish = false + +// HTTP ServeMux paths. +const ( + debugRequestsPath = "/debug/requests" + debugEventsPath = "/debug/events" +) + +// AuthRequest determines whether a specific request is permitted to load the +// /debug/requests or /debug/events pages. +// +// It returns two bools; the first indicates whether the page may be viewed at all, +// and the second indicates whether sensitive events will be shown. +// +// AuthRequest may be replaced by a program to customize its authorization requirements. +// +// The default AuthRequest function returns (true, true) if and only if the request +// comes from localhost/127.0.0.1/[::1]. +var AuthRequest = func(req *http.Request) (any, sensitive bool) { + // RemoteAddr is commonly in the form "IP" or "IP:port". + // If it is in the form "IP:port", split off the port. + host, _, err := net.SplitHostPort(req.RemoteAddr) + if err != nil { + host = req.RemoteAddr + } + switch host { + case "localhost", "127.0.0.1", "::1": + return true, true + default: + return false, false + } +} + +func init() { + _, pat := http.DefaultServeMux.Handler(&http.Request{URL: &url.URL{Path: debugRequestsPath}}) + if pat == debugRequestsPath { + panic("/debug/requests is already registered. You may have two independent copies of " + + "golang.org/x/net/trace in your binary, trying to maintain separate state. This may " + + "involve a vendored copy of golang.org/x/net/trace.") + } + + // TODO(jbd): Serve Traces from /debug/traces in the future? + // There is no requirement for a request to be present to have traces. + http.HandleFunc(debugRequestsPath, Traces) + http.HandleFunc(debugEventsPath, Events) +} + +// NewContext returns a copy of the parent context +// and associates it with a Trace. +func NewContext(ctx context.Context, tr Trace) context.Context { + return context.WithValue(ctx, contextKey, tr) +} + +// FromContext returns the Trace bound to the context, if any. +func FromContext(ctx context.Context) (tr Trace, ok bool) { + tr, ok = ctx.Value(contextKey).(Trace) + return +} + +// Traces responds with traces from the program. +// The package initialization registers it in http.DefaultServeMux +// at /debug/requests. +// +// It performs authorization by running AuthRequest. +func Traces(w http.ResponseWriter, req *http.Request) { + any, sensitive := AuthRequest(req) + if !any { + http.Error(w, "not allowed", http.StatusUnauthorized) + return + } + w.Header().Set("Content-Type", "text/html; charset=utf-8") + Render(w, req, sensitive) +} + +// Events responds with a page of events collected by EventLogs. +// The package initialization registers it in http.DefaultServeMux +// at /debug/events. +// +// It performs authorization by running AuthRequest. +func Events(w http.ResponseWriter, req *http.Request) { + any, sensitive := AuthRequest(req) + if !any { + http.Error(w, "not allowed", http.StatusUnauthorized) + return + } + w.Header().Set("Content-Type", "text/html; charset=utf-8") + RenderEvents(w, req, sensitive) +} + +// Render renders the HTML page typically served at /debug/requests. +// It does not do any auth checking. The request may be nil. +// +// Most users will use the Traces handler. +func Render(w io.Writer, req *http.Request, sensitive bool) { + data := &struct { + Families []string + ActiveTraceCount map[string]int + CompletedTraces map[string]*family + + // Set when a bucket has been selected. + Traces traceList + Family string + Bucket int + Expanded bool + Traced bool + Active bool + ShowSensitive bool // whether to show sensitive events + + Histogram template.HTML + HistogramWindow string // e.g. "last minute", "last hour", "all time" + + // If non-zero, the set of traces is a partial set, + // and this is the total number. + Total int + }{ + CompletedTraces: completedTraces, + } + + data.ShowSensitive = sensitive + if req != nil { + // Allow show_sensitive=0 to force hiding of sensitive data for testing. + // This only goes one way; you can't use show_sensitive=1 to see things. + if req.FormValue("show_sensitive") == "0" { + data.ShowSensitive = false + } + + if exp, err := strconv.ParseBool(req.FormValue("exp")); err == nil { + data.Expanded = exp + } + if exp, err := strconv.ParseBool(req.FormValue("rtraced")); err == nil { + data.Traced = exp + } + } + + completedMu.RLock() + data.Families = make([]string, 0, len(completedTraces)) + for fam := range completedTraces { + data.Families = append(data.Families, fam) + } + completedMu.RUnlock() + sort.Strings(data.Families) + + // We are careful here to minimize the time spent locking activeMu, + // since that lock is required every time an RPC starts and finishes. + data.ActiveTraceCount = make(map[string]int, len(data.Families)) + activeMu.RLock() + for fam, s := range activeTraces { + data.ActiveTraceCount[fam] = s.Len() + } + activeMu.RUnlock() + + var ok bool + data.Family, data.Bucket, ok = parseArgs(req) + switch { + case !ok: + // No-op + case data.Bucket == -1: + data.Active = true + n := data.ActiveTraceCount[data.Family] + data.Traces = getActiveTraces(data.Family) + if len(data.Traces) < n { + data.Total = n + } + case data.Bucket < bucketsPerFamily: + if b := lookupBucket(data.Family, data.Bucket); b != nil { + data.Traces = b.Copy(data.Traced) + } + default: + if f := getFamily(data.Family, false); f != nil { + var obs timeseries.Observable + f.LatencyMu.RLock() + switch o := data.Bucket - bucketsPerFamily; o { + case 0: + obs = f.Latency.Minute() + data.HistogramWindow = "last minute" + case 1: + obs = f.Latency.Hour() + data.HistogramWindow = "last hour" + case 2: + obs = f.Latency.Total() + data.HistogramWindow = "all time" + } + f.LatencyMu.RUnlock() + if obs != nil { + data.Histogram = obs.(*histogram).html() + } + } + } + + if data.Traces != nil { + defer data.Traces.Free() + sort.Sort(data.Traces) + } + + completedMu.RLock() + defer completedMu.RUnlock() + if err := pageTmpl().ExecuteTemplate(w, "Page", data); err != nil { + log.Printf("net/trace: Failed executing template: %v", err) + } +} + +func parseArgs(req *http.Request) (fam string, b int, ok bool) { + if req == nil { + return "", 0, false + } + fam, bStr := req.FormValue("fam"), req.FormValue("b") + if fam == "" || bStr == "" { + return "", 0, false + } + b, err := strconv.Atoi(bStr) + if err != nil || b < -1 { + return "", 0, false + } + + return fam, b, true +} + +func lookupBucket(fam string, b int) *traceBucket { + f := getFamily(fam, false) + if f == nil || b < 0 || b >= len(f.Buckets) { + return nil + } + return f.Buckets[b] +} + +type contextKeyT string + +var contextKey = contextKeyT("golang.org/x/net/trace.Trace") + +// Trace represents an active request. +type Trace interface { + // LazyLog adds x to the event log. It will be evaluated each time the + // /debug/requests page is rendered. Any memory referenced by x will be + // pinned until the trace is finished and later discarded. + LazyLog(x fmt.Stringer, sensitive bool) + + // LazyPrintf evaluates its arguments with fmt.Sprintf each time the + // /debug/requests page is rendered. Any memory referenced by a will be + // pinned until the trace is finished and later discarded. + LazyPrintf(format string, a ...interface{}) + + // SetError declares that this trace resulted in an error. + SetError() + + // SetRecycler sets a recycler for the trace. + // f will be called for each event passed to LazyLog at a time when + // it is no longer required, whether while the trace is still active + // and the event is discarded, or when a completed trace is discarded. + SetRecycler(f func(interface{})) + + // SetTraceInfo sets the trace info for the trace. + // This is currently unused. + SetTraceInfo(traceID, spanID uint64) + + // SetMaxEvents sets the maximum number of events that will be stored + // in the trace. This has no effect if any events have already been + // added to the trace. + SetMaxEvents(m int) + + // Finish declares that this trace is complete. + // The trace should not be used after calling this method. + Finish() +} + +type lazySprintf struct { + format string + a []interface{} +} + +func (l *lazySprintf) String() string { + return fmt.Sprintf(l.format, l.a...) +} + +// New returns a new Trace with the specified family and title. +func New(family, title string) Trace { + tr := newTrace() + tr.ref() + tr.Family, tr.Title = family, title + tr.Start = time.Now() + tr.maxEvents = maxEventsPerTrace + tr.events = tr.eventsBuf[:0] + + activeMu.RLock() + s := activeTraces[tr.Family] + activeMu.RUnlock() + if s == nil { + activeMu.Lock() + s = activeTraces[tr.Family] // check again + if s == nil { + s = new(traceSet) + activeTraces[tr.Family] = s + } + activeMu.Unlock() + } + s.Add(tr) + + // Trigger allocation of the completed trace structure for this family. + // This will cause the family to be present in the request page during + // the first trace of this family. We don't care about the return value, + // nor is there any need for this to run inline, so we execute it in its + // own goroutine, but only if the family isn't allocated yet. + completedMu.RLock() + if _, ok := completedTraces[tr.Family]; !ok { + go allocFamily(tr.Family) + } + completedMu.RUnlock() + + return tr +} + +func (tr *trace) Finish() { + elapsed := time.Now().Sub(tr.Start) + tr.mu.Lock() + tr.Elapsed = elapsed + tr.mu.Unlock() + + if DebugUseAfterFinish { + buf := make([]byte, 4<<10) // 4 KB should be enough + n := runtime.Stack(buf, false) + tr.finishStack = buf[:n] + } + + activeMu.RLock() + m := activeTraces[tr.Family] + activeMu.RUnlock() + m.Remove(tr) + + f := getFamily(tr.Family, true) + tr.mu.RLock() // protects tr fields in Cond.match calls + for _, b := range f.Buckets { + if b.Cond.match(tr) { + b.Add(tr) + } + } + tr.mu.RUnlock() + + // Add a sample of elapsed time as microseconds to the family's timeseries + h := new(histogram) + h.addMeasurement(elapsed.Nanoseconds() / 1e3) + f.LatencyMu.Lock() + f.Latency.Add(h) + f.LatencyMu.Unlock() + + tr.unref() // matches ref in New +} + +const ( + bucketsPerFamily = 9 + tracesPerBucket = 10 + maxActiveTraces = 20 // Maximum number of active traces to show. + maxEventsPerTrace = 10 + numHistogramBuckets = 38 +) + +var ( + // The active traces. + activeMu sync.RWMutex + activeTraces = make(map[string]*traceSet) // family -> traces + + // Families of completed traces. + completedMu sync.RWMutex + completedTraces = make(map[string]*family) // family -> traces +) + +type traceSet struct { + mu sync.RWMutex + m map[*trace]bool + + // We could avoid the entire map scan in FirstN by having a slice of all the traces + // ordered by start time, and an index into that from the trace struct, with a periodic + // repack of the slice after enough traces finish; we could also use a skip list or similar. + // However, that would shift some of the expense from /debug/requests time to RPC time, + // which is probably the wrong trade-off. +} + +func (ts *traceSet) Len() int { + ts.mu.RLock() + defer ts.mu.RUnlock() + return len(ts.m) +} + +func (ts *traceSet) Add(tr *trace) { + ts.mu.Lock() + if ts.m == nil { + ts.m = make(map[*trace]bool) + } + ts.m[tr] = true + ts.mu.Unlock() +} + +func (ts *traceSet) Remove(tr *trace) { + ts.mu.Lock() + delete(ts.m, tr) + ts.mu.Unlock() +} + +// FirstN returns the first n traces ordered by time. +func (ts *traceSet) FirstN(n int) traceList { + ts.mu.RLock() + defer ts.mu.RUnlock() + + if n > len(ts.m) { + n = len(ts.m) + } + trl := make(traceList, 0, n) + + // Fast path for when no selectivity is needed. + if n == len(ts.m) { + for tr := range ts.m { + tr.ref() + trl = append(trl, tr) + } + sort.Sort(trl) + return trl + } + + // Pick the oldest n traces. + // This is inefficient. See the comment in the traceSet struct. + for tr := range ts.m { + // Put the first n traces into trl in the order they occur. + // When we have n, sort trl, and thereafter maintain its order. + if len(trl) < n { + tr.ref() + trl = append(trl, tr) + if len(trl) == n { + // This is guaranteed to happen exactly once during this loop. + sort.Sort(trl) + } + continue + } + if tr.Start.After(trl[n-1].Start) { + continue + } + + // Find where to insert this one. + tr.ref() + i := sort.Search(n, func(i int) bool { return trl[i].Start.After(tr.Start) }) + trl[n-1].unref() + copy(trl[i+1:], trl[i:]) + trl[i] = tr + } + + return trl +} + +func getActiveTraces(fam string) traceList { + activeMu.RLock() + s := activeTraces[fam] + activeMu.RUnlock() + if s == nil { + return nil + } + return s.FirstN(maxActiveTraces) +} + +func getFamily(fam string, allocNew bool) *family { + completedMu.RLock() + f := completedTraces[fam] + completedMu.RUnlock() + if f == nil && allocNew { + f = allocFamily(fam) + } + return f +} + +func allocFamily(fam string) *family { + completedMu.Lock() + defer completedMu.Unlock() + f := completedTraces[fam] + if f == nil { + f = newFamily() + completedTraces[fam] = f + } + return f +} + +// family represents a set of trace buckets and associated latency information. +type family struct { + // traces may occur in multiple buckets. + Buckets [bucketsPerFamily]*traceBucket + + // latency time series + LatencyMu sync.RWMutex + Latency *timeseries.MinuteHourSeries +} + +func newFamily() *family { + return &family{ + Buckets: [bucketsPerFamily]*traceBucket{ + {Cond: minCond(0)}, + {Cond: minCond(50 * time.Millisecond)}, + {Cond: minCond(100 * time.Millisecond)}, + {Cond: minCond(200 * time.Millisecond)}, + {Cond: minCond(500 * time.Millisecond)}, + {Cond: minCond(1 * time.Second)}, + {Cond: minCond(10 * time.Second)}, + {Cond: minCond(100 * time.Second)}, + {Cond: errorCond{}}, + }, + Latency: timeseries.NewMinuteHourSeries(func() timeseries.Observable { return new(histogram) }), + } +} + +// traceBucket represents a size-capped bucket of historic traces, +// along with a condition for a trace to belong to the bucket. +type traceBucket struct { + Cond cond + + // Ring buffer implementation of a fixed-size FIFO queue. + mu sync.RWMutex + buf [tracesPerBucket]*trace + start int // < tracesPerBucket + length int // <= tracesPerBucket +} + +func (b *traceBucket) Add(tr *trace) { + b.mu.Lock() + defer b.mu.Unlock() + + i := b.start + b.length + if i >= tracesPerBucket { + i -= tracesPerBucket + } + if b.length == tracesPerBucket { + // "Remove" an element from the bucket. + b.buf[i].unref() + b.start++ + if b.start == tracesPerBucket { + b.start = 0 + } + } + b.buf[i] = tr + if b.length < tracesPerBucket { + b.length++ + } + tr.ref() +} + +// Copy returns a copy of the traces in the bucket. +// If tracedOnly is true, only the traces with trace information will be returned. +// The logs will be ref'd before returning; the caller should call +// the Free method when it is done with them. +// TODO(dsymonds): keep track of traced requests in separate buckets. +func (b *traceBucket) Copy(tracedOnly bool) traceList { + b.mu.RLock() + defer b.mu.RUnlock() + + trl := make(traceList, 0, b.length) + for i, x := 0, b.start; i < b.length; i++ { + tr := b.buf[x] + if !tracedOnly || tr.spanID != 0 { + tr.ref() + trl = append(trl, tr) + } + x++ + if x == b.length { + x = 0 + } + } + return trl +} + +func (b *traceBucket) Empty() bool { + b.mu.RLock() + defer b.mu.RUnlock() + return b.length == 0 +} + +// cond represents a condition on a trace. +type cond interface { + match(t *trace) bool + String() string +} + +type minCond time.Duration + +func (m minCond) match(t *trace) bool { return t.Elapsed >= time.Duration(m) } +func (m minCond) String() string { return fmt.Sprintf("≥%gs", time.Duration(m).Seconds()) } + +type errorCond struct{} + +func (e errorCond) match(t *trace) bool { return t.IsError } +func (e errorCond) String() string { return "errors" } + +type traceList []*trace + +// Free calls unref on each element of the list. +func (trl traceList) Free() { + for _, t := range trl { + t.unref() + } +} + +// traceList may be sorted in reverse chronological order. +func (trl traceList) Len() int { return len(trl) } +func (trl traceList) Less(i, j int) bool { return trl[i].Start.After(trl[j].Start) } +func (trl traceList) Swap(i, j int) { trl[i], trl[j] = trl[j], trl[i] } + +// An event is a timestamped log entry in a trace. +type event struct { + When time.Time + Elapsed time.Duration // since previous event in trace + NewDay bool // whether this event is on a different day to the previous event + Recyclable bool // whether this event was passed via LazyLog + Sensitive bool // whether this event contains sensitive information + What interface{} // string or fmt.Stringer +} + +// WhenString returns a string representation of the elapsed time of the event. +// It will include the date if midnight was crossed. +func (e event) WhenString() string { + if e.NewDay { + return e.When.Format("2006/01/02 15:04:05.000000") + } + return e.When.Format("15:04:05.000000") +} + +// discarded represents a number of discarded events. +// It is stored as *discarded to make it easier to update in-place. +type discarded int + +func (d *discarded) String() string { + return fmt.Sprintf("(%d events discarded)", int(*d)) +} + +// trace represents an active or complete request, +// either sent or received by this program. +type trace struct { + // Family is the top-level grouping of traces to which this belongs. + Family string + + // Title is the title of this trace. + Title string + + // Start time of the this trace. + Start time.Time + + mu sync.RWMutex + events []event // Append-only sequence of events (modulo discards). + maxEvents int + recycler func(interface{}) + IsError bool // Whether this trace resulted in an error. + Elapsed time.Duration // Elapsed time for this trace, zero while active. + traceID uint64 // Trace information if non-zero. + spanID uint64 + + refs int32 // how many buckets this is in + disc discarded // scratch space to avoid allocation + + finishStack []byte // where finish was called, if DebugUseAfterFinish is set + + eventsBuf [4]event // preallocated buffer in case we only log a few events +} + +func (tr *trace) reset() { + // Clear all but the mutex. Mutexes may not be copied, even when unlocked. + tr.Family = "" + tr.Title = "" + tr.Start = time.Time{} + + tr.mu.Lock() + tr.Elapsed = 0 + tr.traceID = 0 + tr.spanID = 0 + tr.IsError = false + tr.maxEvents = 0 + tr.events = nil + tr.recycler = nil + tr.mu.Unlock() + + tr.refs = 0 + tr.disc = 0 + tr.finishStack = nil + for i := range tr.eventsBuf { + tr.eventsBuf[i] = event{} + } +} + +// delta returns the elapsed time since the last event or the trace start, +// and whether it spans midnight. +// L >= tr.mu +func (tr *trace) delta(t time.Time) (time.Duration, bool) { + if len(tr.events) == 0 { + return t.Sub(tr.Start), false + } + prev := tr.events[len(tr.events)-1].When + return t.Sub(prev), prev.Day() != t.Day() +} + +func (tr *trace) addEvent(x interface{}, recyclable, sensitive bool) { + if DebugUseAfterFinish && tr.finishStack != nil { + buf := make([]byte, 4<<10) // 4 KB should be enough + n := runtime.Stack(buf, false) + log.Printf("net/trace: trace used after finish:\nFinished at:\n%s\nUsed at:\n%s", tr.finishStack, buf[:n]) + } + + /* + NOTE TO DEBUGGERS + + If you are here because your program panicked in this code, + it is almost definitely the fault of code using this package, + and very unlikely to be the fault of this code. + + The most likely scenario is that some code elsewhere is using + a trace.Trace after its Finish method is called. + You can temporarily set the DebugUseAfterFinish var + to help discover where that is; do not leave that var set, + since it makes this package much less efficient. + */ + + e := event{When: time.Now(), What: x, Recyclable: recyclable, Sensitive: sensitive} + tr.mu.Lock() + e.Elapsed, e.NewDay = tr.delta(e.When) + if len(tr.events) < tr.maxEvents { + tr.events = append(tr.events, e) + } else { + // Discard the middle events. + di := int((tr.maxEvents - 1) / 2) + if d, ok := tr.events[di].What.(*discarded); ok { + (*d)++ + } else { + // disc starts at two to count for the event it is replacing, + // plus the next one that we are about to drop. + tr.disc = 2 + if tr.recycler != nil && tr.events[di].Recyclable { + go tr.recycler(tr.events[di].What) + } + tr.events[di].What = &tr.disc + } + // The timestamp of the discarded meta-event should be + // the time of the last event it is representing. + tr.events[di].When = tr.events[di+1].When + + if tr.recycler != nil && tr.events[di+1].Recyclable { + go tr.recycler(tr.events[di+1].What) + } + copy(tr.events[di+1:], tr.events[di+2:]) + tr.events[tr.maxEvents-1] = e + } + tr.mu.Unlock() +} + +func (tr *trace) LazyLog(x fmt.Stringer, sensitive bool) { + tr.addEvent(x, true, sensitive) +} + +func (tr *trace) LazyPrintf(format string, a ...interface{}) { + tr.addEvent(&lazySprintf{format, a}, false, false) +} + +func (tr *trace) SetError() { + tr.mu.Lock() + tr.IsError = true + tr.mu.Unlock() +} + +func (tr *trace) SetRecycler(f func(interface{})) { + tr.mu.Lock() + tr.recycler = f + tr.mu.Unlock() +} + +func (tr *trace) SetTraceInfo(traceID, spanID uint64) { + tr.mu.Lock() + tr.traceID, tr.spanID = traceID, spanID + tr.mu.Unlock() +} + +func (tr *trace) SetMaxEvents(m int) { + tr.mu.Lock() + // Always keep at least three events: first, discarded count, last. + if len(tr.events) == 0 && m > 3 { + tr.maxEvents = m + } + tr.mu.Unlock() +} + +func (tr *trace) ref() { + atomic.AddInt32(&tr.refs, 1) +} + +func (tr *trace) unref() { + if atomic.AddInt32(&tr.refs, -1) == 0 { + tr.mu.RLock() + if tr.recycler != nil { + // freeTrace clears tr, so we hold tr.recycler and tr.events here. + go func(f func(interface{}), es []event) { + for _, e := range es { + if e.Recyclable { + f(e.What) + } + } + }(tr.recycler, tr.events) + } + tr.mu.RUnlock() + + freeTrace(tr) + } +} + +func (tr *trace) When() string { + return tr.Start.Format("2006/01/02 15:04:05.000000") +} + +func (tr *trace) ElapsedTime() string { + tr.mu.RLock() + t := tr.Elapsed + tr.mu.RUnlock() + + if t == 0 { + // Active trace. + t = time.Since(tr.Start) + } + return fmt.Sprintf("%.6f", t.Seconds()) +} + +func (tr *trace) Events() []event { + tr.mu.RLock() + defer tr.mu.RUnlock() + return tr.events +} + +var traceFreeList = make(chan *trace, 1000) // TODO(dsymonds): Use sync.Pool? + +// newTrace returns a trace ready to use. +func newTrace() *trace { + select { + case tr := <-traceFreeList: + return tr + default: + return new(trace) + } +} + +// freeTrace adds tr to traceFreeList if there's room. +// This is non-blocking. +func freeTrace(tr *trace) { + if DebugUseAfterFinish { + return // never reuse + } + tr.reset() + select { + case traceFreeList <- tr: + default: + } +} + +func elapsed(d time.Duration) string { + b := []byte(fmt.Sprintf("%.6f", d.Seconds())) + + // For subsecond durations, blank all zeros before decimal point, + // and all zeros between the decimal point and the first non-zero digit. + if d < time.Second { + dot := bytes.IndexByte(b, '.') + for i := 0; i < dot; i++ { + b[i] = ' ' + } + for i := dot + 1; i < len(b); i++ { + if b[i] == '0' { + b[i] = ' ' + } else { + break + } + } + } + + return string(b) +} + +var pageTmplCache *template.Template +var pageTmplOnce sync.Once + +func pageTmpl() *template.Template { + pageTmplOnce.Do(func() { + pageTmplCache = template.Must(template.New("Page").Funcs(template.FuncMap{ + "elapsed": elapsed, + "add": func(a, b int) int { return a + b }, + }).Parse(pageHTML)) + }) + return pageTmplCache +} + +const pageHTML = ` +{{template "Prolog" .}} +{{template "StatusTable" .}} +{{template "Epilog" .}} + +{{define "Prolog"}} + + + /debug/requests + + + + +

/debug/requests

+{{end}} {{/* end of Prolog */}} + +{{define "StatusTable"}} + + {{range $fam := .Families}} + + + + {{$n := index $.ActiveTraceCount $fam}} + + + {{$f := index $.CompletedTraces $fam}} + {{range $i, $b := $f.Buckets}} + {{$empty := $b.Empty}} + + {{end}} + + {{$nb := len $f.Buckets}} + + + + + + {{end}} +
{{$fam}} + {{if $n}}{{end}} + [{{$n}} active] + {{if $n}}{{end}} + + {{if not $empty}}{{end}} + [{{.Cond}}] + {{if not $empty}}{{end}} + + [minute] + + [hour] + + [total] +
+{{end}} {{/* end of StatusTable */}} + +{{define "Epilog"}} +{{if $.Traces}} +
+

Family: {{$.Family}}

+ +{{if or $.Expanded $.Traced}} + [Normal/Summary] +{{else}} + [Normal/Summary] +{{end}} + +{{if or (not $.Expanded) $.Traced}} + [Normal/Expanded] +{{else}} + [Normal/Expanded] +{{end}} + +{{if not $.Active}} + {{if or $.Expanded (not $.Traced)}} + [Traced/Summary] + {{else}} + [Traced/Summary] + {{end}} + {{if or (not $.Expanded) (not $.Traced)}} + [Traced/Expanded] + {{else}} + [Traced/Expanded] + {{end}} +{{end}} + +{{if $.Total}} +

Showing {{len $.Traces}} of {{$.Total}} traces.

+{{end}} + + + + + {{range $tr := $.Traces}} + + + + + {{/* TODO: include traceID/spanID */}} + + {{if $.Expanded}} + {{range $tr.Events}} + + + + + + {{end}} + {{end}} + {{end}} +
+ {{if $.Active}}Active{{else}}Completed{{end}} Requests +
WhenElapsed (s)
{{$tr.When}}{{$tr.ElapsedTime}}{{$tr.Title}}
{{.WhenString}}{{elapsed .Elapsed}}{{if or $.ShowSensitive (not .Sensitive)}}... {{.What}}{{else}}[redacted]{{end}}
+{{end}} {{/* if $.Traces */}} + +{{if $.Histogram}} +

Latency (µs) of {{$.Family}} over {{$.HistogramWindow}}

+{{$.Histogram}} +{{end}} {{/* if $.Histogram */}} + + + +{{end}} {{/* end of Epilog */}} +` diff --git a/vendor/golang.org/x/oauth2/clientcredentials/clientcredentials.go b/vendor/golang.org/x/oauth2/clientcredentials/clientcredentials.go new file mode 100644 index 0000000000..7a0b9ed102 --- /dev/null +++ b/vendor/golang.org/x/oauth2/clientcredentials/clientcredentials.go @@ -0,0 +1,120 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package clientcredentials implements the OAuth2.0 "client credentials" token flow, +// also known as the "two-legged OAuth 2.0". +// +// This should be used when the client is acting on its own behalf or when the client +// is the resource owner. It may also be used when requesting access to protected +// resources based on an authorization previously arranged with the authorization +// server. +// +// See https://tools.ietf.org/html/rfc6749#section-4.4 +package clientcredentials // import "golang.org/x/oauth2/clientcredentials" + +import ( + "context" + "fmt" + "net/http" + "net/url" + "strings" + + "golang.org/x/oauth2" + "golang.org/x/oauth2/internal" +) + +// Config describes a 2-legged OAuth2 flow, with both the +// client application information and the server's endpoint URLs. +type Config struct { + // ClientID is the application's ID. + ClientID string + + // ClientSecret is the application's secret. + ClientSecret string + + // TokenURL is the resource server's token endpoint + // URL. This is a constant specific to each server. + TokenURL string + + // Scope specifies optional requested permissions. + Scopes []string + + // EndpointParams specifies additional parameters for requests to the token endpoint. + EndpointParams url.Values + + // AuthStyle optionally specifies how the endpoint wants the + // client ID & client secret sent. The zero value means to + // auto-detect. + AuthStyle oauth2.AuthStyle +} + +// Token uses client credentials to retrieve a token. +// +// The provided context optionally controls which HTTP client is used. See the oauth2.HTTPClient variable. +func (c *Config) Token(ctx context.Context) (*oauth2.Token, error) { + return c.TokenSource(ctx).Token() +} + +// Client returns an HTTP client using the provided token. +// The token will auto-refresh as necessary. +// +// The provided context optionally controls which HTTP client +// is returned. See the oauth2.HTTPClient variable. +// +// The returned Client and its Transport should not be modified. +func (c *Config) Client(ctx context.Context) *http.Client { + return oauth2.NewClient(ctx, c.TokenSource(ctx)) +} + +// TokenSource returns a TokenSource that returns t until t expires, +// automatically refreshing it as necessary using the provided context and the +// client ID and client secret. +// +// Most users will use Config.Client instead. +func (c *Config) TokenSource(ctx context.Context) oauth2.TokenSource { + source := &tokenSource{ + ctx: ctx, + conf: c, + } + return oauth2.ReuseTokenSource(nil, source) +} + +type tokenSource struct { + ctx context.Context + conf *Config +} + +// Token refreshes the token by using a new client credentials request. +// tokens received this way do not include a refresh token +func (c *tokenSource) Token() (*oauth2.Token, error) { + v := url.Values{ + "grant_type": {"client_credentials"}, + } + if len(c.conf.Scopes) > 0 { + v.Set("scope", strings.Join(c.conf.Scopes, " ")) + } + for k, p := range c.conf.EndpointParams { + // Allow grant_type to be overridden to allow interoperability with + // non-compliant implementations. + if _, ok := v[k]; ok && k != "grant_type" { + return nil, fmt.Errorf("oauth2: cannot overwrite parameter %q", k) + } + v[k] = p + } + + tk, err := internal.RetrieveToken(c.ctx, c.conf.ClientID, c.conf.ClientSecret, c.conf.TokenURL, v, internal.AuthStyle(c.conf.AuthStyle)) + if err != nil { + if rErr, ok := err.(*internal.RetrieveError); ok { + return nil, (*oauth2.RetrieveError)(rErr) + } + return nil, err + } + t := &oauth2.Token{ + AccessToken: tk.AccessToken, + TokenType: tk.TokenType, + RefreshToken: tk.RefreshToken, + Expiry: tk.Expiry, + } + return t.WithExtra(tk.Raw), nil +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 4e28f9d137..ede53d519c 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -48,6 +48,8 @@ github.com/googleapis/gnostic/openapiv2 github.com/imdario/mergo # github.com/inconshreveable/mousetrap v1.0.0 github.com/inconshreveable/mousetrap +# github.com/jpillora/backoff v1.0.0 +github.com/jpillora/backoff # github.com/json-iterator/go v1.1.11 github.com/json-iterator/go # github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 @@ -56,6 +58,8 @@ github.com/matttproud/golang_protobuf_extensions/pbutil github.com/modern-go/concurrent # github.com/modern-go/reflect2 v1.0.1 github.com/modern-go/reflect2 +# github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f +github.com/mwitkow/go-conntrack # github.com/openshift/api v0.0.0-20210923172539-00988ef88ee0 ## explicit github.com/openshift/api/config/v1 @@ -95,6 +99,8 @@ github.com/openshift/library-go/pkg/verify/util github.com/pkg/errors # github.com/prometheus/client_golang v1.11.0 ## explicit +github.com/prometheus/client_golang/api +github.com/prometheus/client_golang/api/prometheus/v1 github.com/prometheus/client_golang/prometheus github.com/prometheus/client_golang/prometheus/internal github.com/prometheus/client_golang/prometheus/promhttp @@ -102,6 +108,8 @@ github.com/prometheus/client_golang/prometheus/promhttp ## explicit github.com/prometheus/client_model/go # github.com/prometheus/common v0.26.0 +## explicit +github.com/prometheus/common/config github.com/prometheus/common/expfmt github.com/prometheus/common/internal/bitbucket.org/ww/goautoneg github.com/prometheus/common/model @@ -131,8 +139,11 @@ golang.org/x/net/http/httpproxy golang.org/x/net/http2 golang.org/x/net/http2/hpack golang.org/x/net/idna +golang.org/x/net/internal/timeseries +golang.org/x/net/trace # golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d golang.org/x/oauth2 +golang.org/x/oauth2/clientcredentials golang.org/x/oauth2/internal # golang.org/x/sys v0.0.0-20210616094352-59db8d763f22 golang.org/x/sys/internal/unsafeheader From ca186eda342785b2d2297740ceda92b3cd12d929 Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Wed, 10 Nov 2021 21:57:09 -0800 Subject: [PATCH 7/7] pkg/clusterconditions/cache: Add a cache wrapper for client-side throttling Per [1]: Additionally, the operator will continually re-evaluate the blocking conditionals in conditionalUpdates and update conditionalUpdates[].risks accordingly. The timing of the evaluation and freshness are largely internal details, but to avoid consuming excessive monitoring resources and because the rules should be based on slowly-changing state, the operator will handle polling with the following restrictions: * The cluster-version operator will cache polling results for each query, so a single query which is used in evaluating multiple risks over multiple conditional update targets will only be evaluated once per round. * After evaluating a PromQL query, the cluster-version operator will wait at least 10 minutes before evaluating any PromQL. This delay will not be persisted between operator restarts, so a crash-looping CVO may result in higher PromQL load. But a crash-looping CVO will also cause the KubePodCrashLooping alert to fire, which will summon the cluster administrator. * After evaluating a PromQL query, the cluster-version operator will wait at least an hour before evaluating that PromQL query again. That's what this commit sets up. The tests are a bit fiddly, since I wanted to excercise "I have so many queries that I'd like to run, and they're expiring before I can get through them all". I'm trying to show that if you give it enough tries, we won't consistently starve out surprisingly many conditions, even though in that overloaded case, someone is always getting starved out. Unlikely to happen in the wild, but the enhancement section is intentionally addressing the "what if some malicious/misconfigured graph floods the CVO with PromQL suggestions?". [1]: https://github.com/openshift/enhancements/blob/2cc2d9b331532c852878a7c793f3a754914c824e/enhancements/update/targeted-update-edge-blocking.md#cluster-version-operator-support-for-the-enhanced-schema --- pkg/clusterconditions/cache/cache.go | 202 +++++++++++++++++++++ pkg/clusterconditions/cache/cache_test.go | 207 ++++++++++++++++++++++ pkg/clusterconditions/mock/mock.go | 78 ++++++++ pkg/clusterconditions/mock/mock_test.go | 134 ++++++++++++++ pkg/clusterconditions/promql/promql.go | 24 ++- 5 files changed, 636 insertions(+), 9 deletions(-) create mode 100644 pkg/clusterconditions/cache/cache.go create mode 100644 pkg/clusterconditions/cache/cache_test.go create mode 100644 pkg/clusterconditions/mock/mock.go create mode 100644 pkg/clusterconditions/mock/mock_test.go diff --git a/pkg/clusterconditions/cache/cache.go b/pkg/clusterconditions/cache/cache.go new file mode 100644 index 0000000000..393ef06590 --- /dev/null +++ b/pkg/clusterconditions/cache/cache.go @@ -0,0 +1,202 @@ +// Package cache implements a throttled, caching condition. +package cache + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "time" + + configv1 "github.com/openshift/api/config/v1" + "k8s.io/klog/v2" + + "github.com/openshift/cluster-version-operator/pkg/clusterconditions" +) + +// MatchResult represents a single Match invocation. +type MatchResult struct { + // When is the completion time of the wrapped Match call. + When time.Time + + // Access is the time of the most recent access. + Access time.Time + + // Match is whether the result was a match or not. + Match bool + + // Error is the error returned by the Match call. + Error error +} + +// Cache wraps a cluster condition with caching and throttling. +type Cache struct { + // Condition is the wrapped cluster condition. + Condition clusterconditions.Condition + + // LastMatch is the completion time of the most recent Match + // call evaluated by the wrapped cluster condition. + LastMatch time.Time + + // MinBetweenMatches is the minimum duration before a new + // Match call may be evaluated by the wrapped cluster condition. + MinBetweenMatches time.Duration + + // MinForCondition is the minimum duration before a new Match + // call for a given condition may be evaluated. + MinForCondition time.Duration + + // Expiration is the duration a value will be cached before + // being evicted for stale-ness. + Expiration time.Duration + + // MatchResults holds results of previous match invocations. + MatchResults map[string]*MatchResult +} + +// Valid returns an error if the wrapped cluster condition considers +// the value invalid. +func (c *Cache) Valid(ctx context.Context, condition *configv1.ClusterCondition) error { + return c.Condition.Valid(ctx, condition) +} + +// Match returns the match value from the wrapped cluster condition, +// possibly from a fresh evaluation, possibly from a local cache. +func (c *Cache) Match(ctx context.Context, condition *configv1.ClusterCondition) (bool, error) { + if c.MatchResults == nil { + c.MatchResults = map[string]*MatchResult{} + } + + keyBytes, err := json.Marshal(condition) + if err != nil { + return false, fmt.Errorf("unable to marshal condition to JSON for use as a cache key: %w", err) + } + key := string(keyBytes) + + now := time.Now() + + defer func() { + if result, ok := c.MatchResults[key]; ok { + result.Access = now + } else { + c.MatchResults[key] = &MatchResult{Access: now} + } + }() + + c.expireStaleMatchResults(ctx, now) + + sinceLastMatch := now.Sub(c.LastMatch) + if sinceLastMatch <= c.MinBetweenMatches { + if result, ok := c.MatchResults[key]; ok && !result.When.IsZero() { + return result.Match, result.Error + } + return false, fmt.Errorf("client-side throttling: only %s has elapsed since the last match call completed for this cluster condition backend; this cached cluster condition request has been queued for later execution", sinceLastMatch) + } + + // If we only attempt to evaluate the requested condition, and + // the callers request conditions in batches with the same + // order, we might continually evaluate the early conditions + // while starving out later conditions. Instead, spend our + // Match call on the most stale condition. + thiefKey, targetCondition, err := c.calculateMostStale(ctx, now) + if err != nil { + return false, fmt.Errorf("calculating the most stale cached cluster-condition match entry: %w", err) + } + + if thiefKey == "" { + thiefKey = key // cache is empty, or no access since last evaluation, so evaluate the requested condition + targetCondition = condition + } + + if targetCondition == nil { + if result, ok := c.MatchResults[key]; ok && !result.When.IsZero() { + return result.Match, result.Error + } + var detail string + if thiefResult, ok := c.MatchResults[thiefKey]; ok { + detail = fmt.Sprintf(" (last evaluated on %s)", thiefResult.When) + } + klog.V(4).Infof("%s is the most stale cached cluster-condition match entry, but it is too fresh%s. However, we don't have a cached evaluation for %s, so attempt to evaluate that now.", thiefKey, detail, key) + } + + // if we ended up stealing this Match call, log that, to make contention more clear + if thiefKey != key { + var reason string + if thiefResult, ok := c.MatchResults[thiefKey]; !ok || thiefResult.When.IsZero() { + reason = "it has never been evaluated" + } else { + reason = fmt.Sprintf("its last evaluation completed %s ago", now.Sub(thiefResult.When)) + } + klog.V(4).Infof("%s is stealing this cluster-condition match call for %s, because %s", thiefKey, key, reason) + } + + match, err := c.Condition.Match(ctx, targetCondition) + now = time.Now() + c.LastMatch = now + if _, ok := c.MatchResults[thiefKey]; !ok { + c.MatchResults[thiefKey] = &MatchResult{} + } + result := c.MatchResults[thiefKey] + result.When = now + result.Match = match + result.Error = err + + if result, ok := c.MatchResults[key]; ok && !result.When.IsZero() { + return result.Match, result.Error + } + return false, errors.New("client-side throttling: this cached cluster condition request has been queued for later execution") +} + +// expireStaleMatchResults removes entries from MatchResults if their +// last-evaluation When is more than Expiration ago. For MatchResults +// entries which have never been evaluated, the last-request Access +// time is used instead. +func (c *Cache) expireStaleMatchResults(ctx context.Context, now time.Time) { + for key, value := range c.MatchResults { + age := now.Sub(value.When) + aspect := "result" + if value.When.IsZero() { + age = now.Sub(value.Access) + aspect = "queued request" + } + if age > c.Expiration { + klog.V(4).Infof("pruning %q from the condition cache, as the %s is %s old", key, aspect, age) + delete(c.MatchResults, key) + } + } +} + +// calculateMostStale returns the most-stale entry in the cache, or +// nil if the most-stale entry has been evaluated more recently than +// MinForCondition ago. +func (c *Cache) calculateMostStale(ctx context.Context, now time.Time) (string, *configv1.ClusterCondition, error) { + var thiefKey string + var thiefResult *MatchResult + for candidateKey, value := range c.MatchResults { + if !value.Access.After(value.When) { + continue // no requests since its last wrapped Match call + } + if thiefResult == nil || + value.When.Before(thiefResult.When) || // refresh the most stale + (value.When == thiefResult.When && value.Access.Before(thiefResult.Access)) { // break ties in favor of the longest-queued + thiefKey = candidateKey + thiefResult = c.MatchResults[candidateKey] + } + } + + if thiefKey == "" { // empty cache, or no access since last evaluation + return "", nil, nil + } + + if thiefResult != nil && now.Sub(thiefResult.When) < c.MinForCondition { + // thief is our most stale, and it's still too fresh to Match again. Just return from the cache if we have a cached result. + return thiefKey, nil, nil + } + + var targetCondition *configv1.ClusterCondition + if err := json.Unmarshal([]byte(thiefKey), &targetCondition); err != nil { + delete(c.MatchResults, thiefKey) + return "", nil, fmt.Errorf("%s is the most stale cached cluster-condition match entry, but key is invalid JSON: %v", thiefKey, err) + } + return thiefKey, targetCondition, nil +} diff --git a/pkg/clusterconditions/cache/cache_test.go b/pkg/clusterconditions/cache/cache_test.go new file mode 100644 index 0000000000..b19079c3c5 --- /dev/null +++ b/pkg/clusterconditions/cache/cache_test.go @@ -0,0 +1,207 @@ +package cache + +import ( + "context" + "fmt" + "reflect" + "regexp" + "testing" + "time" + + configv1 "github.com/openshift/api/config/v1" + + "github.com/openshift/cluster-version-operator/pkg/clusterconditions/mock" +) + +func TestCache(t *testing.T) { + ctx := context.Background() + m := &mock.Mock{} + c := &Cache{ + Condition: m, + MinBetweenMatches: 225 * time.Millisecond, + MinForCondition: time.Second, // so 4 requests before we can repeat a previous condition + Expiration: 2 * time.Second, // so ~8 conditions before expiration overtakes refreshing + } + + minConcurrence := 2 + maxConcurrence := 10 + lowConcurrenceRounds := 5 + sleepBetweenRounds := 100 * time.Millisecond + highConcurrenceRounds := maxConcurrence * 2 * int(c.Expiration) / int(sleepBetweenRounds) // long enough for a round of expiration and recovery + tailRounds := 2 * int(c.Expiration) / int(sleepBetweenRounds) // long enough to clear out all the high-concurrency conditions + totalRounds := lowConcurrenceRounds + highConcurrenceRounds + tailRounds + success := make(map[int]bool, maxConcurrence) + for i := 0; i < totalRounds; i++ { + if i > 0 { + time.Sleep(sleepBetweenRounds) + } + + concurrent := minConcurrence + if i >= lowConcurrenceRounds && i < lowConcurrenceRounds+highConcurrenceRounds { + concurrent = maxConcurrence + } + for j := 0; j < concurrent; j++ { + name := fmt.Sprintf("condition %d", j) + condition := configv1.ClusterCondition{Type: name} + m.MatchQueue = append(m.MatchQueue, mock.MatchResult{Match: true, Error: nil}) + match, err := c.Match(ctx, &condition) + t.Logf("%s round %d, %s -> %t %v", time.Now(), i, name, match, err) + if err == nil { + success[j] = true + } + } + if i == lowConcurrenceRounds-1 || i == lowConcurrenceRounds+highConcurrenceRounds-1 || i == totalRounds-1 { + successful := 0 + for k := 0; k < concurrent; k++ { + if success[k] { + successful++ + success[k] = false + } else { + t.Logf("failed to achieve expected success for condition %d during round %d with %d concurrency", k, i, concurrent) + } + } + if successful < concurrent && successful < 7 { + t.Errorf("only %d successfully evaluated conditions during round %d with %d concurrency", successful, i, concurrent) + } + } + } + + for i, call := range m.Calls { + t.Logf("call %d, %s %v", i, call.When, call.Condition) + } + + minSpaceBetweenCalls := m.Calls[1].When.Sub(m.Calls[0].When) + maxSpaceBetweenCalls := minSpaceBetweenCalls + for i := 2; i < len(m.Calls); i++ { + spaceBetweenCalls := m.Calls[i].When.Sub(m.Calls[i-1].When) + if spaceBetweenCalls < minSpaceBetweenCalls { + minSpaceBetweenCalls = spaceBetweenCalls + } else if spaceBetweenCalls > maxSpaceBetweenCalls { + maxSpaceBetweenCalls = spaceBetweenCalls + } + } + if minSpaceBetweenCalls < c.MinBetweenMatches { + t.Errorf("the minimum space between calls of %s violated the configured minimum of %s", minSpaceBetweenCalls, c.MinBetweenMatches) + } else { + t.Logf("minimum space between Match calls was %s, which complies with the configured minimum of %s", minSpaceBetweenCalls, c.MinBetweenMatches) + } + expectedMaximumSpaceBetweenCalls := c.MinForCondition + sleepBetweenRounds + 50*time.Millisecond + if maxSpaceBetweenCalls > expectedMaximumSpaceBetweenCalls { + t.Errorf("the maximum space between calls of %s exceeded the expected maximum of %s", maxSpaceBetweenCalls, expectedMaximumSpaceBetweenCalls) + } else { + t.Logf("maximum space between Match calls was %s, which complies with the expected maximum of %s", maxSpaceBetweenCalls, expectedMaximumSpaceBetweenCalls) + } + + for i := 0; i < maxConcurrence; i++ { + name := fmt.Sprintf("condition %d", i) + condition := configv1.ClusterCondition{Type: name} + var previousCall *mock.Call + for j, call := range m.Calls { + if reflect.DeepEqual(call.Condition, condition) { + if previousCall != nil { + spaceBetweenCalls := call.When.Sub(previousCall.When) + if spaceBetweenCalls < c.MinForCondition { + t.Errorf("the space between %s calls of %s violated the configured minimum of %s", name, spaceBetweenCalls, c.MinForCondition) + } + } + previousCall = &m.Calls[j] + } + } + } +} + +func Test_calculateMostStale(t *testing.T) { + ctx := context.Background() + now := time.Now() + for _, testCase := range []struct { + name string + cache map[string]*MatchResult + expectedKey string + expectedCondition *configv1.ClusterCondition + expectedError *regexp.Regexp + }{ + { + name: "empty cache", + cache: map[string]*MatchResult{}, + }, + { + name: "single entry, invalid key", + cache: map[string]*MatchResult{ + "a": {Access: now, Match: true, Error: nil}, + }, + expectedError: regexp.MustCompile(`^a is the most stale cached cluster-condition match entry, but key is invalid JSON: .*`), + }, + { + name: "single entry, never evaluated", + cache: map[string]*MatchResult{ + `{"type": "a"}`: {Access: now, Match: true, Error: nil}, + }, + expectedKey: `{"type": "a"}`, + expectedCondition: &configv1.ClusterCondition{Type: "a"}, + }, + { + name: "single entry, no access since old evaluation", + cache: map[string]*MatchResult{ + `{"type": "a"}`: {When: now.Add(-time.Hour), Access: now.Add(-time.Hour), Match: true, Error: nil}, + }, + }, + { + name: "single entry, has access since old evaluation", + cache: map[string]*MatchResult{ + `{"type": "a"}`: {When: now.Add(-time.Hour), Access: now, Match: true, Error: nil}, + }, + expectedKey: `{"type": "a"}`, + expectedCondition: &configv1.ClusterCondition{Type: "a"}, + }, + { + name: "single entry, has access since new evaluation", + cache: map[string]*MatchResult{ + `{"type": "a"}`: {When: now.Add(-time.Minute), Access: now, Match: true, Error: nil}, + }, + expectedKey: `{"type": "a"}`, + }, + { + name: "two entries, both old evaluatations, clear evaluation winner", + cache: map[string]*MatchResult{ + `{"type": "a"}`: {When: now.Add(-2 * time.Hour), Access: now, Match: true, Error: nil}, + `{"type": "b"}`: {When: now.Add(-time.Hour), Access: now.Add(-time.Minute), Match: true, Error: nil}, + }, + expectedKey: `{"type": "a"}`, + expectedCondition: &configv1.ClusterCondition{Type: "a"}, + }, + { + name: "two entries, both old evaluations, tied evaluation, access winner", + cache: map[string]*MatchResult{ + `{"type": "a"}`: {When: now.Add(-time.Hour), Access: now.Add(-time.Minute), Match: true, Error: nil}, + `{"type": "b"}`: {When: now.Add(-time.Hour), Match: true, Error: nil}, + }, + expectedKey: `{"type": "a"}`, + expectedCondition: &configv1.ClusterCondition{Type: "a"}, + }, + } { + t.Run(testCase.name, func(t *testing.T) { + c := Cache{ + MinForCondition: 30 * time.Minute, + MatchResults: testCase.cache, + } + key, condition, err := c.calculateMostStale(ctx, now) + + if key != testCase.expectedKey { + t.Errorf("got key %q but expected %q", key, testCase.expectedKey) + } + + if !reflect.DeepEqual(condition, testCase.expectedCondition) { + t.Errorf("got condition %v but expected %v", condition, testCase.expectedCondition) + } + + if err != nil && testCase.expectedError == nil { + t.Errorf("unexpected error: %v", err) + } else if testCase.expectedError != nil && err == nil { + t.Errorf("unexpected success, expected: %s", testCase.expectedError) + } else if testCase.expectedError != nil && !testCase.expectedError.MatchString(err.Error()) { + t.Errorf("expected error %s, not: %v", testCase.expectedError, err) + } + }) + } + +} diff --git a/pkg/clusterconditions/mock/mock.go b/pkg/clusterconditions/mock/mock.go new file mode 100644 index 0000000000..bede44d735 --- /dev/null +++ b/pkg/clusterconditions/mock/mock.go @@ -0,0 +1,78 @@ +// Package mock implements a cluster condition with mock responses, +// for convenient testing. +package mock + +import ( + "context" + "errors" + "time" + + configv1 "github.com/openshift/api/config/v1" +) + +// MatchResult represents the response to a single Match invocation. +type MatchResult struct { + // Match is whether the result was a match or not. + Match bool + + // Error is the error returned by the Match call. + Error error +} + +// Call records a call to a cluster condition method. +type Call struct { + // When records the time of the method call. + When time.Time + + // Method records the method name. + Method string + + // Condition records the condition configuration passed to the method call. + Condition configv1.ClusterCondition +} + +// Mock implements a cluster condition with mock responses. +type Mock struct { + // ValidQueue is a set of responses queued for Valid calls. + ValidQueue []error + + // MatchQueue is a set of responses queued for Match calls. + MatchQueue []MatchResult + + // Calls records calls to the cluster condition. + Calls []Call +} + +// Valid returns an error popped from ValidQueue. +func (m *Mock) Valid(ctx context.Context, condition *configv1.ClusterCondition) error { + m.Calls = append(m.Calls, Call{ + When: time.Now(), + Method: "Valid", + Condition: *condition, + }) + + if len(m.ValidQueue) == 0 { + return errors.New("the mock's ValidQueue stack is empty") + } + + result := m.ValidQueue[0] + m.ValidQueue = m.ValidQueue[1:] + return result +} + +// Match returns an error popped from MatchQueue. +func (m *Mock) Match(ctx context.Context, condition *configv1.ClusterCondition) (bool, error) { + m.Calls = append(m.Calls, Call{ + When: time.Now(), + Method: "Match", + Condition: *condition, + }) + + if len(m.MatchQueue) == 0 { + return false, errors.New("the mock's MatchQueue stack is empty") + } + + result := m.MatchQueue[0] + m.MatchQueue = m.MatchQueue[1:] + return result.Match, result.Error +} diff --git a/pkg/clusterconditions/mock/mock_test.go b/pkg/clusterconditions/mock/mock_test.go new file mode 100644 index 0000000000..b6bec9e9da --- /dev/null +++ b/pkg/clusterconditions/mock/mock_test.go @@ -0,0 +1,134 @@ +package mock_test + +import ( + "context" + "errors" + "fmt" + "reflect" + "regexp" + "testing" + "time" + + configv1 "github.com/openshift/api/config/v1" + + "github.com/openshift/cluster-version-operator/pkg/clusterconditions/mock" +) + +func TestMock(t *testing.T) { + ctx := context.Background() + m := &mock.Mock{ + ValidQueue: []error{nil, errors.New("error a"), errors.New("error b")}, + MatchQueue: []mock.MatchResult{ + { + Match: true, + Error: nil, + }, + { + Match: false, + Error: errors.New("error c"), + }, + { + Match: false, + Error: nil, + }, + }, + } + + for i, expectedError := range []*regexp.Regexp{ + nil, + regexp.MustCompile("^error a$"), + regexp.MustCompile("^error b$"), + regexp.MustCompile("^the mock's ValidQueue stack is empty$"), + } { + name := fmt.Sprintf("Valid call %d", i) + t.Run(name, func(t *testing.T) { + condition := configv1.ClusterCondition{Type: name} + + before := time.Now() + err := m.Valid(ctx, &condition) + after := time.Now() + + if err != nil && expectedError == nil { + t.Errorf("unexpected error: %v", err) + } else if expectedError != nil && err == nil { + t.Errorf("unexpected success, expected: %s", expectedError) + } else if expectedError != nil && !expectedError.MatchString(err.Error()) { + t.Errorf("expected error %s, not: %v", expectedError, err) + } + + if len(m.Calls) == 0 { + t.Fatal("mock call was not logged") + } + logged := m.Calls[len(m.Calls)-1] + if logged.When.Before(before) { + t.Errorf("logged time %s but called after %s", logged.When, before) + } else if logged.When.After(after) { + t.Errorf("logged time %s but call completed by %s", logged.When, after) + } + if logged.Method != "Valid" { + t.Errorf("logged method %q but expected Valid", logged.Method) + } + if !reflect.DeepEqual(logged.Condition, condition) { + t.Errorf("logged condition %v but expected %v", logged.Condition, condition) + } + }) + } + + for i, testCase := range []struct { + expectedMatch bool + expectedError *regexp.Regexp + }{ + { + expectedMatch: true, + expectedError: nil, + }, + { + expectedMatch: false, + expectedError: regexp.MustCompile("^error c$"), + }, + { + expectedMatch: false, + expectedError: nil, + }, + { + expectedMatch: false, + expectedError: regexp.MustCompile("^the mock's MatchQueue stack is empty$"), + }, + } { + name := fmt.Sprintf("Match call %d", i) + t.Run(name, func(t *testing.T) { + condition := configv1.ClusterCondition{Type: name} + + before := time.Now() + match, err := m.Match(ctx, &condition) + after := time.Now() + + if match != testCase.expectedMatch { + t.Errorf("got match %t but expected %t", match, testCase.expectedMatch) + } + if err != nil && testCase.expectedError == nil { + t.Errorf("unexpected error: %v", err) + } else if testCase.expectedError != nil && err == nil { + t.Errorf("unexpected success, expected: %s", testCase.expectedError) + } else if testCase.expectedError != nil && !testCase.expectedError.MatchString(err.Error()) { + t.Errorf("expected error %s, not: %v", testCase.expectedError, err) + } + + if len(m.Calls) == 0 { + t.Fatal("mock call was not logged") + } + logged := m.Calls[len(m.Calls)-1] + if logged.When.Before(before) { + t.Errorf("logged time %s but called after %s", logged.When, before) + } else if logged.When.After(after) { + t.Errorf("logged time %s but call completed by %s", logged.When, after) + } + if logged.Method != "Match" { + t.Errorf("logged method %q but expected Match", logged.Method) + } + if !reflect.DeepEqual(logged.Condition, condition) { + t.Errorf("logged condition %v but expected %v", logged.Condition, condition) + } + }) + } +} diff --git a/pkg/clusterconditions/promql/promql.go b/pkg/clusterconditions/promql/promql.go index 4263cedc55..1e0359c48a 100644 --- a/pkg/clusterconditions/promql/promql.go +++ b/pkg/clusterconditions/promql/promql.go @@ -17,6 +17,7 @@ import ( "k8s.io/klog/v2" "github.com/openshift/cluster-version-operator/pkg/clusterconditions" + "github.com/openshift/cluster-version-operator/pkg/clusterconditions/cache" ) // PromQL implements a cluster condition that matches based on PromQL. @@ -28,17 +29,22 @@ type PromQL struct { HTTPClientConfig config.HTTPClientConfig } -var promql = &PromQL{ - Address: "https://thanos-querier.openshift-monitoring.svc.cluster.local:9091", - HTTPClientConfig: config.HTTPClientConfig{ - Authorization: &config.Authorization{ - Type: "Bearer", - CredentialsFile: "/var/run/secrets/kubernetes.io/serviceaccount/token", - }, - TLSConfig: config.TLSConfig{ - CAFile: "/etc/tls/service-ca/service-ca.crt", +var promql = &cache.Cache{ + Condition: &PromQL{ + Address: "https://thanos-querier.openshift-monitoring.svc.cluster.local:9091", + HTTPClientConfig: config.HTTPClientConfig{ + Authorization: &config.Authorization{ + Type: "Bearer", + CredentialsFile: "/var/run/secrets/kubernetes.io/serviceaccount/token", + }, + TLSConfig: config.TLSConfig{ + CAFile: "/etc/tls/service-ca/service-ca.crt", + }, }, }, + MinBetweenMatches: 10 * time.Minute, + MinForCondition: time.Hour, + Expiration: 24 * time.Hour, } // Valid returns an error if the condition contains any properties