diff --git a/.circleci/config.yml b/.circleci/config.yml index 5a18a4a0689..b5ae35cbdd4 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -2,14 +2,15 @@ version: 2.1 orbs: - prometheus: prometheus/prometheus@0.1.0 + prometheus: prometheus/prometheus@0.3.0 + go: circleci/go@0.2.0 executors: # Whenever the Go version is updated here, .travis.yml and .promu.yml # should also be updated. golang: docker: - - image: circleci/golang:1.13 + - image: circleci/golang:1.13-node fuzzit: docker: @@ -21,10 +22,16 @@ jobs: steps: - prometheus/setup_environment + - go/load-cache: + key: v1 + - restore_cache: + keys: + - v1-npm-deps-{{ checksum "web/ui/react-app/yarn.lock" }} + - v1-npm-deps- - run: command: make environment: - # Run garbage collection more aggresively to avoid getting OOMed during the lint phase. + # Run garbage collection more aggressively to avoid getting OOMed during the lint phase. GOGC: "20" # By default Go uses GOMAXPROCS but a Circle CI executor has many # cores (> 30) while the CPU and RAM resources are throttled. If we @@ -36,6 +43,12 @@ jobs: file: prometheus - prometheus/store_artifact: file: promtool + - go/save-cache: + key: v1 + - save_cache: + key: v1-npm-deps-{{ checksum "web/ui/react-app/yarn.lock" }} + paths: + - web/ui/react-app/node_modules fuzzit_regression: executor: fuzzit working_directory: /go/src/github.com/prometheus/prometheus @@ -82,6 +95,7 @@ workflows: filters: branches: only: master + image: circleci/golang:1-node - prometheus/publish_release: context: org-context requires: @@ -92,6 +106,7 @@ workflows: only: /^v[0-9]+(\.[0-9]+){2}(-.+|[^-.]*)$/ branches: ignore: /.*/ + image: circleci/golang:1-node nightly: triggers: - schedule: diff --git a/.gitignore b/.gitignore index de38f8dc750..2a0cffbf377 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,9 @@ benchmark.txt !/.golangci.yml /documentation/examples/remote_storage/remote_storage_adapter/remote_storage_adapter /documentation/examples/remote_storage/example_write_adapter/example_writer_adapter + +npm_licenses.tar.bz2 +/web/ui/static/react +# NOTE(spasquie): unlike upstream, we want the assets to be committed in the repository because build environments don't have access to Yarn and external package repositories. +# This means that every time we update to a new Prometheus version, 'make assets' should be run and the resulting file committed to the repository. +#/web/ui/assets_vfsdata.go diff --git a/.promu.yml b/.promu.yml index fca5b67b3dd..9258dbbaf71 100644 --- a/.promu.yml +++ b/.promu.yml @@ -12,7 +12,10 @@ build: path: ./cmd/promtool - name: tsdbtool path: ./tsdb/cmd/tsdb + # NOTE(spasquie): Go 1.12 doesn't support '-tags netgo,builtinassets' and promu doesn't support + # space-separated tags so the 'ondiskassets' build tag is used instead of 'builtinassets'. flags: -mod=vendor -a -tags netgo + #flags: -mod=vendor -a -tags netgo,builtinassets ldflags: | -s -X github.com/prometheus/common/version.Version={{.Version}} @@ -27,6 +30,7 @@ tarball: - documentation/examples/prometheus.yml - LICENSE - NOTICE + - npm_licenses.tar.bz2 crossbuild: platforms: - linux/amd64 diff --git a/.travis.yml b/.travis.yml index 0e9c5e80e4e..303517d9b40 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,8 +12,11 @@ go_import_path: github.com/prometheus/prometheus # random issues on Travis. before_install: - travis_retry make deps +- . $HOME/.nvm/nvm.sh +- nvm install stable +- nvm use stable - if [[ "$TRAVIS_OS_NAME" == "windows" ]]; then choco install make; fi script: -- make check_license style unused test lint check_assets +- make check_license style unused test lint - git diff --exit-code diff --git a/CHANGELOG.md b/CHANGELOG.md index 9cc11a3a99d..08afc4c2060 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,21 @@ +## 2.14.0 / 2019-11-11 + +* [SECURITY/BUGFIX] UI: Ensure warnings from the API are escaped. #6279 +* [FEATURE] API: `/api/v1/status/runtimeinfo` and `/api/v1/status/buildinfo` endpoints added for use by the React UI. #6243 +* [FEATURE] React UI: implement the new experimental React based UI. #5694 and many more + * Can be found by under `/new`. + * Not all pages are implemented yet. +* [FEATURE] Status: Cardinality statistics added to the Runtime & Build Information page. #6125 +* [ENHANCEMENT/BUGFIX] Remote write: fix delays in remote write after a compaction. #6021 +* [ENHANCEMENT] UI: Alerts can be filtered by state. #5758 +* [BUGFIX] API: lifecycle endpoints return 403 when not enabled. #6057 +* [BUGFIX] Build: Fix Solaris build. #6149 +* [BUGFIX] Promtool: Remove false duplicate rule warnings when checking rule files with alerts. #6270 +* [BUGFIX] Remote write: restore use of deduplicating logger in remote write. #6113 +* [BUGFIX] Remote write: do not reshard when unable to send samples. #6111 +* [BUGFIX] Service discovery: errors are no longer logged on context cancellation. #6116, #6133 +* [BUGFIX] UI: handle null response from API properly. #6071 + ## 2.13.1 / 2019-10-16 * [BUGFIX] Fix panic in ARM builds of Prometheus. #6110 @@ -138,7 +156,7 @@ and the memory is available to the kernel when it needs it. * [BUGFIX] Check if label value is valid when unmarshaling external labels from YAML. #5316 * [BUGFIX] Promparse: sort all labels when parsing. #5372 * [BUGFIX] Reload rules: copy state on both name and labels. #5368 -* [BUGFIX] Exponentation operator to drop metric name in result of operation. #5329 +* [BUGFIX] Exponentiation operator to drop metric name in result of operation. #5329 * [BUGFIX] Config: resolve more file paths. #5284 * [BUGFIX] Promtool: resolve relative paths in alert test files. #5336 * [BUGFIX] Set TLSHandshakeTimeout in HTTP transport. common#179 diff --git a/Dockerfile b/Dockerfile index e42b18ef682..72dee1386e8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,6 +10,9 @@ COPY .build/${OS}-${ARCH}/promtool /bin/promtool COPY documentation/examples/prometheus.yml /etc/prometheus/prometheus.yml COPY console_libraries/ /usr/share/prometheus/console_libraries/ COPY consoles/ /usr/share/prometheus/consoles/ +COPY LICENSE /LICENSE +COPY NOTICE /NOTICE +COPY npm_licenses.tar.bz2 /npm_licenses.tar.bz2 RUN ln -s /usr/share/prometheus/console_libraries /usr/share/prometheus/consoles/ /etc/prometheus/ RUN mkdir -p /prometheus && \ diff --git a/Dockerfile.ocp b/Dockerfile.ocp index b950bb3dc50..a7aced462dd 100644 --- a/Dockerfile.ocp +++ b/Dockerfile.ocp @@ -1,7 +1,13 @@ FROM registry.svc.ci.openshift.org/openshift/release:golang-1.12 AS builder WORKDIR /go/src/github.com/prometheus/prometheus COPY . . -RUN if yum install -y prometheus-promu; then export BUILD_PROMU=false; fi && make build +# NOTE(spasquie): the 'build' target regenerates the ReactJS code and the Go +# assets on the fly. Build environments don't have access to Yarn and external +# package repositories so we use the 'common-build' target instead. +# It means that every time we update to a new Prometheus version, 'make assets' +# should be run locally and the resulting file (web/ui/assets_vfsdata.go) +# should be committed to the repository. +RUN if yum install -y prometheus-promu; then export BUILD_PROMU=false; fi && make common-build FROM registry.svc.ci.openshift.org/openshift/origin-v4.0:base LABEL io.k8s.display-name="OpenShift Prometheus" \ diff --git a/MAINTAINERS.md b/MAINTAINERS.md index e00da702c07..a61d396a62f 100644 --- a/MAINTAINERS.md +++ b/MAINTAINERS.md @@ -3,4 +3,6 @@ Maintainers of this repository with their focus areas: * Brian Brazil @brian-brazil: Console templates; semantics of PromQL, service discovery, and relabeling. * Fabian Reinartz @fabxc: PromQL parsing and evaluation; implementation of retrieval, alert notification, and service discovery. * Julius Volz @juliusv: Remote storage integrations; web UI. +* Krasi Georgiev @krasi-georgiev: TSDB - the storage engine. +* Ganesh Vernekar @codesome: TSDB - the storage engine. diff --git a/Makefile b/Makefile index 11d4fa508e4..cad1dd7ad13 100644 --- a/Makefile +++ b/Makefile @@ -14,6 +14,12 @@ # Needs to be defined before including Makefile.common to auto-generate targets DOCKER_ARCHS ?= amd64 armv7 arm64 +REACT_APP_PATH = web/ui/react-app +REACT_APP_SOURCE_FILES = $(wildcard $(REACT_APP_PATH)/public/* $(REACT_APP_PATH)/src/* $(REACT_APP_PATH)/tsconfig.json) +REACT_APP_OUTPUT_DIR = web/ui/static/react +REACT_APP_NODE_MODULES_PATH = $(REACT_APP_PATH)/node_modules +REACT_APP_NPM_LICENSES_TARBALL = "npm_licenses.tar.bz2" + TSDB_PROJECT_DIR = "./tsdb" TSDB_CLI_DIR="$(TSDB_PROJECT_DIR)/cmd/tsdb" TSDB_BIN = "$(TSDB_CLI_DIR)/tsdb" @@ -25,23 +31,60 @@ include Makefile.common DOCKER_IMAGE_NAME ?= prometheus +$(REACT_APP_NODE_MODULES_PATH): $(REACT_APP_PATH)/package.json $(REACT_APP_PATH)/yarn.lock + cd $(REACT_APP_PATH) && yarn --frozen-lockfile + +$(REACT_APP_OUTPUT_DIR): $(REACT_APP_NODE_MODULES_PATH) $(REACT_APP_SOURCE_FILES) + @echo ">> building React app" + @./scripts/build_react_app.sh + .PHONY: assets -assets: +assets: $(REACT_APP_OUTPUT_DIR) @echo ">> writing assets" - cd $(PREFIX)/web/ui && GO111MODULE=$(GO111MODULE) $(GO) generate -x -v $(GOOPTS) + # Un-setting GOOS and GOARCH here because the generated Go code is always the same, + # but the cached object code is incompatible between architectures and OSes (which + # breaks cross-building for different combinations on CI in the same container). + cd web/ui && GO111MODULE=$(GO111MODULE) GOOS= GOARCH= $(GO) generate -x -v $(GOOPTS) @$(GOFMT) -w ./web/ui -.PHONY: check_assets -check_assets: assets - @echo ">> checking that assets are up-to-date" - @if ! (cd $(PREFIX)/web/ui && git diff --exit-code); then \ - echo "Run 'make assets' and commit the changes to fix the error."; \ - exit 1; \ - fi +.PHONY: react-app-lint +react-app-lint: + @echo ">> running React app linting" + cd $(REACT_APP_PATH) && yarn lint:ci + +.PHONY: react-app-lint-fix +react-app-lint-fix: + @echo ">> running React app linting and fixing errors where possibe" + cd $(REACT_APP_PATH) && yarn lint + +.PHONY: react-app-test +react-app-test: | $(REACT_APP_NODE_MODULES_PATH) react-app-lint + @echo ">> running React app tests" + cd $(REACT_APP_PATH) && yarn test --no-watch --coverage + +.PHONY: test +test: common-test react-app-test + +.PHONY: npm_licenses +npm_licenses: $(REACT_APP_NODE_MODULES_PATH) + @echo ">> bundling npm licenses" + rm -f $(REACT_APP_NPM_LICENSES_TARBALL) + find $(REACT_APP_NODE_MODULES_PATH) -iname "license*" | tar cfj $(REACT_APP_NPM_LICENSES_TARBALL) --transform 's/^/npm_licenses\//' --files-from=- + +.PHONY: tarball +tarball: npm_licenses common-tarball + +.PHONY: docker +docker: npm_licenses common-docker + +.PHONY: build +build: assets common-build +.PHONY: build_tsdb build_tsdb: GO111MODULE=$(GO111MODULE) $(GO) build -o $(TSDB_BIN) $(TSDB_CLI_DIR) +.PHONY: bench_tsdb bench_tsdb: build_tsdb @echo ">> running benchmark, writing result to $(TSDB_BENCHMARK_OUTPUT_DIR)" @$(TSDB_BIN) bench write --metrics=$(TSDB_BENCHMARK_NUM_METRICS) --out=$(TSDB_BENCHMARK_OUTPUT_DIR) $(TSDB_BENCHMARK_DATASET) diff --git a/NOTICE b/NOTICE index e36e57e5276..30ce2a82630 100644 --- a/NOTICE +++ b/NOTICE @@ -85,3 +85,9 @@ go-zookeeper - Native ZooKeeper client for Go https://github.com/samuel/go-zookeeper Copyright (c) 2013, Samuel Stauffer See https://github.com/samuel/go-zookeeper/blob/master/LICENSE for license details. + +We also use code from a large number of npm packages. For details, see: +- https://github.com/prometheus/prometheus/blob/master/web/ui/react-app/package.json +- https://github.com/prometheus/prometheus/blob/master/web/ui/react-app/package-lock.json +- The individual package licenses as copied from the node_modules directory can be found in + the npm_licenses.tar.bz2 archive in release tarballs and Docker images. diff --git a/README.md b/README.md index 3a167564562..ef0e3062ca8 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Prometheus +# Prometheus [![Build Status](https://travis-ci.org/prometheus/prometheus.svg)][travis] [![CircleCI](https://circleci.com/gh/prometheus/prometheus/tree/master.svg?style=shield)][circleci] @@ -16,7 +16,7 @@ from configured targets at given intervals, evaluates rule expressions, displays the results, and can trigger alerts if some condition is observed to be true. -Prometheus' main distinguishing features as compared to other monitoring systems are: +Prometheus's main distinguishing features as compared to other monitoring systems are: - a **multi-dimensional** data model (timeseries defined by metric name and set of key/value dimensions) - a **flexible query language** to leverage this dimensionality @@ -60,6 +60,8 @@ Prometheus will now be reachable at http://localhost:9090/. To build Prometheus from the source code yourself you need to have a working Go environment with [version 1.13 or greater installed](https://golang.org/doc/install). +You will also need to have [Node.js](https://nodejs.org/) and [Yarn](https://yarnpkg.com/) +installed in order to build the frontend assets. You can directly use the `go` tool to download and install the `prometheus` and `promtool` binaries into your `GOPATH`: @@ -67,7 +69,14 @@ and `promtool` binaries into your `GOPATH`: $ go get github.com/prometheus/prometheus/cmd/... $ prometheus --config.file=your_config.yml -You can also clone the repository yourself and build using `make`: +*However*, when using `go get` to build Prometheus, Prometheus will expect to be able to +read its web assets from local filesystem directories under `web/ui/static` and +`web/ui/templates`. In order for these assets to be found, you will have to run Prometheus +from the root of the cloned repository. Note also that these directories do not include the +new experimental React UI unless it has been built explicitly using `make assets` or `make build`. + +You can also clone the repository yourself and build using `make build`, which will compile in +the web assets so that Prometheus can be run from anywhere: $ mkdir -p $GOPATH/src/github.com/prometheus $ cd $GOPATH/src/github.com/prometheus @@ -78,12 +87,11 @@ You can also clone the repository yourself and build using `make`: The Makefile provides several targets: - * *build*: build the `prometheus` and `promtool` binaries + * *build*: build the `prometheus` and `promtool` binaries (includes building and compiling in web assets) * *test*: run the tests * *test-short*: run the short tests * *format*: format the source code * *vet*: check the source code for common errors - * *assets*: rebuild the static assets * *docker*: build a docker container for the current `HEAD` ## More information diff --git a/RELEASE.md b/RELEASE.md index 46a6d68ce95..b6ba52149bf 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -56,7 +56,7 @@ For a patch release, work in the branch of the minor release you want to patch. For a new major or minor release, create the corresponding release branch based on the master branch. -Bump the version in the `VERSION` file and update `CHANGELOG.md`. Do this in a proper PR as this gives others the opportunity to chime in on the release in general and on the addition to the changelog in particular. +Bump the version in the `VERSION` file and update `CHANGELOG.md`. Do this in a proper PR pointing to the release branch as this gives others the opportunity to chime in on the release in general and on the addition to the changelog in particular. Note that `CHANGELOG.md` should only document changes relevant to users of Prometheus, including external API changes, performance improvements, and new features. Do not document changes of internal interfaces, code refactorings and clean-ups, changes to the build process, etc. People interested in these are asked to refer to the git history. diff --git a/VERSION b/VERSION index 94f15e9cc30..edcfe40d198 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.13.1 +2.14.0 diff --git a/cmd/promtool/main.go b/cmd/promtool/main.go index 1a620037c19..63eb445debc 100644 --- a/cmd/promtool/main.go +++ b/cmd/promtool/main.go @@ -324,19 +324,19 @@ type compareRuleType struct { label map[string]string } -func checkDuplicates(r []rulefmt.RuleGroup) []compareRuleType { +func checkDuplicates(groups []rulefmt.RuleGroup) []compareRuleType { var duplicates []compareRuleType - for rindex := range r { - for index, props := range r[rindex].Rules { + for _, group := range groups { + for index, rule := range group.Rules { inst := compareRuleType{ - metric: props.Record, - label: props.Labels, + metric: ruleMetric(rule), + label: rule.Labels, } for i := 0; i < index; i++ { t := compareRuleType{ - metric: r[rindex].Rules[i].Record, - label: r[rindex].Rules[i].Labels, + metric: ruleMetric(group.Rules[i]), + label: group.Rules[i].Labels, } if reflect.DeepEqual(t, inst) { duplicates = append(duplicates, t) @@ -347,6 +347,13 @@ func checkDuplicates(r []rulefmt.RuleGroup) []compareRuleType { return duplicates } +func ruleMetric(rule rulefmt.Rule) string { + if rule.Alert != "" { + return rule.Alert + } + return rule.Record +} + var checkMetricsUsage = strings.TrimSpace(` Pass Prometheus metrics over stdin to lint them for consistency and correctness. diff --git a/discovery/consul/consul.go b/discovery/consul/consul.go index ccc09b072d7..f0db8d762b6 100644 --- a/discovery/consul/consul.go +++ b/discovery/consul/consul.go @@ -357,6 +357,13 @@ func (d *Discovery) watchServices(ctx context.Context, ch chan<- []*targetgroup. elapsed := time.Since(t0) rpcDuration.WithLabelValues("catalog", "services").Observe(elapsed.Seconds()) + // Check the context before in order to exit early. + select { + case <-ctx.Done(): + return + default: + } + if err != nil { level.Error(d.logger).Log("msg", "Error refreshing service list", "err", err) rpcFailuresCount.Inc() diff --git a/discovery/dns/dns.go b/discovery/dns/dns.go index 8710bf3277f..014d5239e4c 100644 --- a/discovery/dns/dns.go +++ b/discovery/dns/dns.go @@ -151,7 +151,7 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { wg.Add(len(d.names)) for _, name := range d.names { go func(n string) { - if err := d.refreshOne(ctx, n, ch); err != nil { + if err := d.refreshOne(ctx, n, ch); err != nil && err != context.Canceled { level.Error(d.logger).Log("msg", "Error refreshing DNS targets", "err", err) } wg.Done() diff --git a/discovery/kubernetes/endpoints.go b/discovery/kubernetes/endpoints.go index 83e39d973d9..93acec58457 100644 --- a/discovery/kubernetes/endpoints.go +++ b/discovery/kubernetes/endpoints.go @@ -128,7 +128,9 @@ func (e *Endpoints) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { defer e.queue.ShutDown() if !cache.WaitForCacheSync(ctx.Done(), e.endpointsInf.HasSynced, e.serviceInf.HasSynced, e.podInf.HasSynced) { - level.Error(e.logger).Log("msg", "endpoints informer unable to sync cache") + if ctx.Err() != context.Canceled { + level.Error(e.logger).Log("msg", "endpoints informer unable to sync cache") + } return } diff --git a/discovery/kubernetes/ingress.go b/discovery/kubernetes/ingress.go index 9ad4677db7f..10c729ede6f 100644 --- a/discovery/kubernetes/ingress.go +++ b/discovery/kubernetes/ingress.go @@ -70,7 +70,9 @@ func (i *Ingress) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { defer i.queue.ShutDown() if !cache.WaitForCacheSync(ctx.Done(), i.informer.HasSynced) { - level.Error(i.logger).Log("msg", "ingress informer unable to sync cache") + if ctx.Err() != context.Canceled { + level.Error(i.logger).Log("msg", "ingress informer unable to sync cache") + } return } @@ -142,7 +144,8 @@ const ( ) func ingressLabels(ingress *v1beta1.Ingress) model.LabelSet { - ls := make(model.LabelSet, len(ingress.Labels)+len(ingress.Annotations)+2) + // Each label and annotation will create two key-value pairs in the map. + ls := make(model.LabelSet, 2*(len(ingress.Labels)+len(ingress.Annotations))+2) ls[ingressNameLabel] = lv(ingress.Name) ls[namespaceLabel] = lv(ingress.Namespace) diff --git a/discovery/kubernetes/node.go b/discovery/kubernetes/node.go index 973be2809ba..08c933b3895 100644 --- a/discovery/kubernetes/node.go +++ b/discovery/kubernetes/node.go @@ -79,7 +79,9 @@ func (n *Node) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { defer n.queue.ShutDown() if !cache.WaitForCacheSync(ctx.Done(), n.informer.HasSynced) { - level.Error(n.logger).Log("msg", "node informer unable to sync cache") + if ctx.Err() != context.Canceled { + level.Error(n.logger).Log("msg", "node informer unable to sync cache") + } return } @@ -149,7 +151,8 @@ const ( ) func nodeLabels(n *apiv1.Node) model.LabelSet { - ls := make(model.LabelSet, len(n.Labels)+len(n.Annotations)+1) + // Each label and annotation will create two key-value pairs in the map. + ls := make(model.LabelSet, 2*(len(n.Labels)+len(n.Annotations))+1) ls[nodeNameLabel] = lv(n.Name) diff --git a/discovery/kubernetes/pod.go b/discovery/kubernetes/pod.go index 4f522e96a79..baf58d24bb1 100644 --- a/discovery/kubernetes/pod.go +++ b/discovery/kubernetes/pod.go @@ -82,7 +82,9 @@ func (p *Pod) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { defer p.queue.ShutDown() if !cache.WaitForCacheSync(ctx.Done(), p.informer.HasSynced) { - level.Error(p.logger).Log("msg", "pod informer unable to sync cache") + if ctx.Err() != context.Canceled { + level.Error(p.logger).Log("msg", "pod informer unable to sync cache") + } return } diff --git a/discovery/kubernetes/service.go b/discovery/kubernetes/service.go index 25471558cce..ca01a5b38c1 100644 --- a/discovery/kubernetes/service.go +++ b/discovery/kubernetes/service.go @@ -75,7 +75,9 @@ func (s *Service) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { defer s.queue.ShutDown() if !cache.WaitForCacheSync(ctx.Done(), s.informer.HasSynced) { - level.Error(s.logger).Log("msg", "service informer unable to sync cache") + if ctx.Err() != context.Canceled { + level.Error(s.logger).Log("msg", "service informer unable to sync cache") + } return } @@ -147,7 +149,8 @@ const ( ) func serviceLabels(svc *apiv1.Service) model.LabelSet { - ls := make(model.LabelSet, len(svc.Labels)+len(svc.Annotations)+2) + // Each label and annotation will create two key-value pairs in the map. + ls := make(model.LabelSet, 2*(len(svc.Labels)+len(svc.Annotations))+2) ls[serviceNameLabel] = lv(svc.Name) ls[namespaceLabel] = lv(svc.Namespace) diff --git a/discovery/manager_test.go b/discovery/manager_test.go index 9e5b229fb49..72adfbaca26 100644 --- a/discovery/manager_test.go +++ b/discovery/manager_test.go @@ -25,11 +25,13 @@ import ( "time" "github.com/go-kit/kit/log" - "github.com/prometheus/client_golang/prometheus" - dto "github.com/prometheus/client_model/go" + "github.com/prometheus/client_golang/prometheus/testutil" + common_config "github.com/prometheus/common/config" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/config" sd_config "github.com/prometheus/prometheus/discovery/config" + "github.com/prometheus/prometheus/discovery/consul" + "github.com/prometheus/prometheus/discovery/file" "github.com/prometheus/prometheus/discovery/targetgroup" "gopkg.in/yaml.v2" ) @@ -749,27 +751,33 @@ func verifyPresence(t *testing.T, tSets map[poolKey]map[string]*targetgroup.Grou } func TestTargetSetRecreatesTargetGroupsEveryRun(t *testing.T) { - cfg := &config.Config{} - - sOne := ` -scrape_configs: - - job_name: 'prometheus' - static_configs: - - targets: ["foo:9090"] - - targets: ["bar:9090"] -` - if err := yaml.UnmarshalStrict([]byte(sOne), cfg); err != nil { - t.Fatalf("Unable to load YAML config sOne: %s", err) - } ctx, cancel := context.WithCancel(context.Background()) defer cancel() discoveryManager := NewManager(ctx, log.NewNopLogger()) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() - c := make(map[string]sd_config.ServiceDiscoveryConfig) - for _, v := range cfg.ScrapeConfigs { - c[v.JobName] = v.ServiceDiscoveryConfig + c := map[string]sd_config.ServiceDiscoveryConfig{ + "prometheus": sd_config.ServiceDiscoveryConfig{ + StaticConfigs: []*targetgroup.Group{ + &targetgroup.Group{ + Source: "0", + Targets: []model.LabelSet{ + model.LabelSet{ + model.AddressLabel: model.LabelValue("foo:9090"), + }, + }, + }, + &targetgroup.Group{ + Source: "1", + Targets: []model.LabelSet{ + model.LabelSet{ + model.AddressLabel: model.LabelValue("bar:9090"), + }, + }, + }, + }, + }, } discoveryManager.ApplyConfig(c) @@ -777,18 +785,17 @@ scrape_configs: verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus", provider: "string/0"}, "{__address__=\"foo:9090\"}", true) verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus", provider: "string/0"}, "{__address__=\"bar:9090\"}", true) - sTwo := ` -scrape_configs: - - job_name: 'prometheus' - static_configs: - - targets: ["foo:9090"] -` - if err := yaml.UnmarshalStrict([]byte(sTwo), cfg); err != nil { - t.Fatalf("Unable to load YAML config sTwo: %s", err) - } - c = make(map[string]sd_config.ServiceDiscoveryConfig) - for _, v := range cfg.ScrapeConfigs { - c[v.JobName] = v.ServiceDiscoveryConfig + c["prometheus"] = sd_config.ServiceDiscoveryConfig{ + StaticConfigs: []*targetgroup.Group{ + &targetgroup.Group{ + Source: "0", + Targets: []model.LabelSet{ + model.LabelSet{ + model.AddressLabel: model.LabelValue("foo:9090"), + }, + }, + }, + }, } discoveryManager.ApplyConfig(c) @@ -801,43 +808,33 @@ scrape_configs: // removing all targets from the static_configs sends an update with empty targetGroups. // This is required to signal the receiver that this target set has no current targets. func TestTargetSetRecreatesEmptyStaticConfigs(t *testing.T) { - cfg := &config.Config{} - - sOne := ` -scrape_configs: - - job_name: 'prometheus' - static_configs: - - targets: ["foo:9090"] -` - if err := yaml.UnmarshalStrict([]byte(sOne), cfg); err != nil { - t.Fatalf("Unable to load YAML config sOne: %s", err) - } ctx, cancel := context.WithCancel(context.Background()) defer cancel() discoveryManager := NewManager(ctx, log.NewNopLogger()) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() - c := make(map[string]sd_config.ServiceDiscoveryConfig) - for _, v := range cfg.ScrapeConfigs { - c[v.JobName] = v.ServiceDiscoveryConfig + c := map[string]sd_config.ServiceDiscoveryConfig{ + "prometheus": sd_config.ServiceDiscoveryConfig{ + StaticConfigs: []*targetgroup.Group{ + &targetgroup.Group{ + Source: "0", + Targets: []model.LabelSet{ + model.LabelSet{ + model.AddressLabel: model.LabelValue("foo:9090"), + }, + }, + }, + }, + }, } discoveryManager.ApplyConfig(c) <-discoveryManager.SyncCh() verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus", provider: "string/0"}, "{__address__=\"foo:9090\"}", true) - sTwo := ` -scrape_configs: - - job_name: 'prometheus' - static_configs: -` - if err := yaml.UnmarshalStrict([]byte(sTwo), cfg); err != nil { - t.Fatalf("Unable to load YAML config sTwo: %s", err) - } - c = make(map[string]sd_config.ServiceDiscoveryConfig) - for _, v := range cfg.ScrapeConfigs { - c[v.JobName] = v.ServiceDiscoveryConfig + c["prometheus"] = sd_config.ServiceDiscoveryConfig{ + StaticConfigs: []*targetgroup.Group{}, } discoveryManager.ApplyConfig(c) @@ -876,30 +873,33 @@ func TestIdenticalConfigurationsAreCoalesced(t *testing.T) { } defer os.Remove(tmpFile2) - cfg := &config.Config{} - - sOne := ` -scrape_configs: - - job_name: 'prometheus' - file_sd_configs: - - files: ["%s"] - - job_name: 'prometheus2' - file_sd_configs: - - files: ["%s"] -` - sOne = fmt.Sprintf(sOne, tmpFile2, tmpFile2) - if err := yaml.UnmarshalStrict([]byte(sOne), cfg); err != nil { - t.Fatalf("Unable to load YAML config sOne: %s", err) - } ctx, cancel := context.WithCancel(context.Background()) defer cancel() discoveryManager := NewManager(ctx, nil) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() - c := make(map[string]sd_config.ServiceDiscoveryConfig) - for _, v := range cfg.ScrapeConfigs { - c[v.JobName] = v.ServiceDiscoveryConfig + c := map[string]sd_config.ServiceDiscoveryConfig{ + "prometheus": sd_config.ServiceDiscoveryConfig{ + FileSDConfigs: []*file.SDConfig{ + &file.SDConfig{ + Files: []string{ + tmpFile2, + }, + RefreshInterval: file.DefaultSDConfig.RefreshInterval, + }, + }, + }, + "prometheus2": sd_config.ServiceDiscoveryConfig{ + FileSDConfigs: []*file.SDConfig{ + &file.SDConfig{ + Files: []string{ + tmpFile2, + }, + RefreshInterval: file.DefaultSDConfig.RefreshInterval, + }, + }, + }, } discoveryManager.ApplyConfig(c) @@ -924,7 +924,6 @@ scrape_configs: if err := yaml.UnmarshalStrict([]byte(cfgText), originalConfig); err != nil { t.Fatalf("Unable to load YAML config cfgYaml: %s", err) } - origScrpCfg := originalConfig.ScrapeConfigs[0] processedConfig := &config.Config{} if err := yaml.UnmarshalStrict([]byte(cfgText), processedConfig); err != nil { @@ -936,100 +935,76 @@ scrape_configs: discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() - c := make(map[string]sd_config.ServiceDiscoveryConfig) - for _, v := range processedConfig.ScrapeConfigs { - c[v.JobName] = v.ServiceDiscoveryConfig + c := map[string]sd_config.ServiceDiscoveryConfig{ + "prometheus": processedConfig.ScrapeConfigs[0].ServiceDiscoveryConfig, } discoveryManager.ApplyConfig(c) <-discoveryManager.SyncCh() + origSdcfg := originalConfig.ScrapeConfigs[0].ServiceDiscoveryConfig for _, sdcfg := range c { - if !reflect.DeepEqual(origScrpCfg.ServiceDiscoveryConfig.StaticConfigs, sdcfg.StaticConfigs) { + if !reflect.DeepEqual(origSdcfg.StaticConfigs, sdcfg.StaticConfigs) { t.Fatalf("discovery manager modified static config \n expected: %v\n got: %v\n", - origScrpCfg.ServiceDiscoveryConfig.StaticConfigs, sdcfg.StaticConfigs) + origSdcfg.StaticConfigs, sdcfg.StaticConfigs) } } } func TestGaugeFailedConfigs(t *testing.T) { - var ( - fcGauge prometheus.Gauge - err error - ) - - cfgOneText := ` -scrape_configs: -- job_name: prometheus - consul_sd_configs: - - server: "foo:8500" - tls_config: - cert_file: "/tmp/non_existent" - - server: "bar:8500" - tls_config: - cert_file: "/tmp/non_existent" - - server: "foo2:8500" - tls_config: - cert_file: "/tmp/non_existent" -` - cfgOne := &config.Config{} - - err = yaml.UnmarshalStrict([]byte(cfgOneText), cfgOne) - if err != nil { - t.Fatalf("Unable to load YAML config cfgOne: %s", err) - } ctx, cancel := context.WithCancel(context.Background()) defer cancel() discoveryManager := NewManager(ctx, log.NewNopLogger()) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() - c := make(map[string]sd_config.ServiceDiscoveryConfig) - for _, v := range cfgOne.ScrapeConfigs { - c[v.JobName] = v.ServiceDiscoveryConfig + c := map[string]sd_config.ServiceDiscoveryConfig{ + "prometheus": sd_config.ServiceDiscoveryConfig{ + ConsulSDConfigs: []*consul.SDConfig{ + &consul.SDConfig{ + Server: "foo:8500", + TLSConfig: common_config.TLSConfig{ + CertFile: "/tmp/non_existent", + }, + }, + &consul.SDConfig{ + Server: "bar:8500", + TLSConfig: common_config.TLSConfig{ + CertFile: "/tmp/non_existent", + }, + }, + &consul.SDConfig{ + Server: "foo2:8500", + TLSConfig: common_config.TLSConfig{ + CertFile: "/tmp/non_existent", + }, + }, + }, + }, } - discoveryManager.ApplyConfig(c) <-discoveryManager.SyncCh() - metricOne := &dto.Metric{} - fcGauge, err = failedConfigs.GetMetricWithLabelValues(discoveryManager.name) - if err != nil { - t.Fatal(err) - } - - fcGauge.Write(metricOne) - - failedCount := metricOne.GetGauge().GetValue() + failedCount := testutil.ToFloat64(failedConfigs) if failedCount != 3 { t.Fatalf("Expected to have 3 failed configs, got: %v", failedCount) } - cfgTwoText := ` -scrape_configs: - - job_name: 'prometheus' - static_configs: - - targets: ["foo:9090"] -` - cfgTwo := &config.Config{} - if err := yaml.UnmarshalStrict([]byte(cfgTwoText), cfgTwo); err != nil { - t.Fatalf("Unable to load YAML config cfgTwo: %s", err) - } - c = make(map[string]sd_config.ServiceDiscoveryConfig) - for _, v := range cfgTwo.ScrapeConfigs { - c[v.JobName] = v.ServiceDiscoveryConfig + c["prometheus"] = sd_config.ServiceDiscoveryConfig{ + StaticConfigs: []*targetgroup.Group{ + &targetgroup.Group{ + Source: "0", + Targets: []model.LabelSet{ + model.LabelSet{ + model.AddressLabel: "foo:9090", + }, + }, + }, + }, } - discoveryManager.ApplyConfig(c) <-discoveryManager.SyncCh() - metricTwo := &dto.Metric{} - fcGauge, err = failedConfigs.GetMetricWithLabelValues(discoveryManager.name) - if err != nil { - t.Fatal(err) - } - fcGauge.Write(metricTwo) - - failedCount = metricTwo.GetGauge().GetValue() + failedCount = testutil.ToFloat64(failedConfigs) if failedCount != 0 { t.Fatalf("Expected to get no failed config, got: %v", failedCount) } diff --git a/discovery/refresh/refresh.go b/discovery/refresh/refresh.go index ebc99e2e044..c48524c508e 100644 --- a/discovery/refresh/refresh.go +++ b/discovery/refresh/refresh.go @@ -75,7 +75,9 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { // Get an initial set right away. tgs, err := d.refresh(ctx) if err != nil { - level.Error(d.logger).Log("msg", "Unable to refresh target groups", "err", err.Error()) + if ctx.Err() != context.Canceled { + level.Error(d.logger).Log("msg", "Unable to refresh target groups", "err", err.Error()) + } } else { select { case ch <- tgs: @@ -92,7 +94,9 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { case <-ticker.C: tgs, err := d.refresh(ctx) if err != nil { - level.Error(d.logger).Log("msg", "Unable to refresh target groups", "err", err.Error()) + if ctx.Err() != context.Canceled { + level.Error(d.logger).Log("msg", "Unable to refresh target groups", "err", err.Error()) + } continue } diff --git a/discovery/targetgroup/targetgroup_test.go b/discovery/targetgroup/targetgroup_test.go index 0087b272fd7..c4a588e0ffe 100644 --- a/discovery/targetgroup/targetgroup_test.go +++ b/discovery/targetgroup/targetgroup_test.go @@ -17,6 +17,9 @@ import ( "errors" "testing" + "github.com/prometheus/common/model" + "gopkg.in/yaml.v2" + "github.com/prometheus/prometheus/util/testutil" ) @@ -24,10 +27,19 @@ func TestTargetGroupStrictJsonUnmarshal(t *testing.T) { tests := []struct { json string expectedReply error + expectedGroup Group }{ { json: ` {"labels": {},"targets": []}`, expectedReply: nil, + expectedGroup: Group{Targets: []model.LabelSet{}, Labels: model.LabelSet{}}, + }, + { + json: ` {"labels": {"my":"label"},"targets": ["localhost:9090","localhost:9091"]}`, + expectedReply: nil, + expectedGroup: Group{Targets: []model.LabelSet{ + model.LabelSet{"__address__": "localhost:9090"}, + model.LabelSet{"__address__": "localhost:9091"}}, Labels: model.LabelSet{"my": "label"}}, }, { json: ` {"label": {},"targets": []}`, @@ -38,11 +50,108 @@ func TestTargetGroupStrictJsonUnmarshal(t *testing.T) { expectedReply: errors.New("json: unknown field \"target\""), }, } - tg := Group{} for _, test := range tests { + tg := Group{} actual := tg.UnmarshalJSON([]byte(test.json)) testutil.Equals(t, test.expectedReply, actual) + testutil.Equals(t, test.expectedGroup, tg) + } + +} + +func TestTargetGroupYamlMarshal(t *testing.T) { + marshal := func(g interface{}) []byte { + d, err := yaml.Marshal(g) + if err != nil { + panic(err) + } + return d + } + + tests := []struct { + expectedYaml string + expectetedErr error + group Group + }{ + { + // labels should be omitted if empty. + group: Group{}, + expectedYaml: "targets: []\n", + expectetedErr: nil, + }, + { + // targets only exposes addresses. + group: Group{Targets: []model.LabelSet{ + model.LabelSet{"__address__": "localhost:9090"}, + model.LabelSet{"__address__": "localhost:9091"}}, + Labels: model.LabelSet{"foo": "bar", "bar": "baz"}}, + expectedYaml: "targets:\n- localhost:9090\n- localhost:9091\nlabels:\n bar: baz\n foo: bar\n", + expectetedErr: nil, + }, + } + + for _, test := range tests { + actual, err := test.group.MarshalYAML() + testutil.Equals(t, test.expectetedErr, err) + testutil.Equals(t, test.expectedYaml, string(marshal(actual))) + } +} + +func TestTargetGroupYamlUnmarshal(t *testing.T) { + unmarshal := func(d []byte) func(interface{}) error { + return func(o interface{}) error { + return yaml.Unmarshal(d, o) + } + } + tests := []struct { + yaml string + expectedGroup Group + expectedReply error + }{ + { + // empty target group. + yaml: "labels:\ntargets:\n", + expectedGroup: Group{Targets: []model.LabelSet{}}, + expectedReply: nil, + }, + { + // brackets syntax. + yaml: "labels:\n my: label\ntargets:\n ['localhost:9090', 'localhost:9191']", + expectedReply: nil, + expectedGroup: Group{Targets: []model.LabelSet{ + model.LabelSet{"__address__": "localhost:9090"}, + model.LabelSet{"__address__": "localhost:9191"}}, Labels: model.LabelSet{"my": "label"}}, + }, + { + // incorrect syntax. + yaml: "labels:\ntargets:\n 'localhost:9090'", + expectedReply: &yaml.TypeError{Errors: []string{"line 3: cannot unmarshal !!str `localho...` into []string"}}, + }, + } + + for _, test := range tests { + tg := Group{} + actual := tg.UnmarshalYAML(unmarshal([]byte(test.yaml))) + testutil.Equals(t, test.expectedReply, actual) + testutil.Equals(t, test.expectedGroup, tg) } } + +func TestString(t *testing.T) { + // String() should return only the source, regardless of other attributes. + group1 := + Group{Targets: []model.LabelSet{ + model.LabelSet{"__address__": "localhost:9090"}, + model.LabelSet{"__address__": "localhost:9091"}}, + Source: "", + Labels: model.LabelSet{"foo": "bar", "bar": "baz"}} + group2 := + Group{Targets: []model.LabelSet{}, + Source: "", + Labels: model.LabelSet{}} + testutil.Equals(t, "", group1.String()) + testutil.Equals(t, "", group2.String()) + testutil.Equals(t, group1.String(), group2.String()) +} diff --git a/docs/configuration/alerting_rules.md b/docs/configuration/alerting_rules.md index ea112b1e200..c9c81bb9370 100644 --- a/docs/configuration/alerting_rules.md +++ b/docs/configuration/alerting_rules.md @@ -101,5 +101,5 @@ on top of the simple alert definitions. In Prometheus's ecosystem, the role. Thus, Prometheus may be configured to periodically send information about alert states to an Alertmanager instance, which then takes care of dispatching the right notifications. -Prometheus can be [configured](configuration.md) to automatically discovered available +Prometheus can be [configured](configuration.md) to automatically discover available Alertmanager instances through its service discovery integrations. diff --git a/docs/installation.md b/docs/installation.md index a4210842bb9..0c62b46702d 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -39,15 +39,19 @@ two examples. Bind-mount your `prometheus.yml` from the host by running: ```bash -docker run -p 9090:9090 -v /tmp/prometheus.yml:/etc/prometheus/prometheus.yml \ - prom/prometheus +docker run \ + -p 9090:9090 \ + -v /tmp/prometheus.yml:/etc/prometheus/prometheus.yml \ + prom/prometheus ``` Or use an additional volume for the config: ```bash -docker run -p 9090:9090 -v /prometheus-data \ - prom/prometheus --config.file=/prometheus-data/prometheus.yml +docker run \ + -p 9090:9090 \ + -v /path/to/config:/etc/prometheus \ + prom/prometheus ``` ### Custom image diff --git a/docs/management_api.md b/docs/management_api.md index 584aa40a427..a525076cfdd 100644 --- a/docs/management_api.md +++ b/docs/management_api.md @@ -35,7 +35,7 @@ POST /-/reload This endpoint triggers a reload of the Prometheus configuration and rule files. It's disabled by default and can be enabled via the `--web.enable-lifecycle` flag. -An alternative way trigger a configuration reload is by sending a `SIGHUP` to the Prometheus process. +Alternatively, a configuration reload can be triggered by sending a `SIGHUP` to the Prometheus process. ### Quit @@ -47,4 +47,4 @@ POST /-/quit This endpoint triggers a graceful shutdown of Prometheus. It's disabled by default and can be enabled via the `--web.enable-lifecycle` flag. -An alternative way trigger a graceful shutdown is by sending a `SIGTERM` to the Prometheus process. +Alternatively, a graceful shutdown can be triggered by sending a `SIGTERM` to the Prometheus process. diff --git a/docs/querying/api.md b/docs/querying/api.md index 81d06d1b9e2..18da0bd294a 100644 --- a/docs/querying/api.md +++ b/docs/querying/api.md @@ -679,7 +679,7 @@ The following endpoint returns flag values that Prometheus was configured with: GET /api/v1/status/flags ``` -All values are in a form of "string". +All values are of the result type `string`. ```json $ curl http://localhost:9090/api/v1/status/flags @@ -698,6 +698,70 @@ $ curl http://localhost:9090/api/v1/status/flags *New in v2.2* +### Runtime Information + +The following endpoint returns various runtime information properties about the Prometheus server: + +``` +GET /api/v1/status/runtimeinfo +``` + +The returned values are of different types, depending on the nature of the runtime property. + +```json +$ curl http://localhost:9090/api/v1/status/runtimeinfo +{ + "status": "success", + "data": { + "startTime": "2019-11-02T17:23:59.301361365+01:00", + "CWD": "/", + "reloadConfigSuccess": true, + "lastConfigTime": "2019-11-02T17:23:59+01:00", + "chunkCount": 873, + "timeSeriesCount": 873, + "corruptionCount": 0, + "goroutineCount": 48, + "GOMAXPROCS": 4, + "GOGC": "", + "GODEBUG": "", + "storageRetention": "15d" + } +} +``` + +**NOTE**: The exact returned runtime properties may change without notice between Prometheus versions. + +*New in v2.14* + +### Build Information + +The following endpoint returns various build information properties about the Prometheus server: + +``` +GET /api/v1/status/buildinfo +``` + +All values are of the result type `string`. + +```json +$ curl http://localhost:9090/api/v1/status/buildinfo +{ + "status": "success", + "data": { + "version": "2.13.1", + "revision": "cb7cbad5f9a2823a622aaa668833ca04f50a0ea7", + "branch": "master", + "buildUser": "julius@desktop", + "buildDate": "20191102-16:19:59", + "goVersion": "go1.13.1" + } +} +``` + +**NOTE**: The exact returned build properties may change without notice between Prometheus versions. + +*New in v2.14* + ## TSDB Admin APIs These are APIs that expose database functionalities for the advanced user. These APIs are not enabled unless the `--web.enable-admin-api` is set. diff --git a/docs/querying/basics.md b/docs/querying/basics.md index 65748d9f0f7..9ac37c04121 100644 --- a/docs/querying/basics.md +++ b/docs/querying/basics.md @@ -188,6 +188,12 @@ in detail in the [expression language operators](operators.md) page. Prometheus supports several functions to operate on data. These are described in detail in the [expression language functions](functions.md) page. +## Comments + +PromQL supports line comments that start with `#`. Example: + + # This is a comment + ## Gotchas ### Staleness diff --git a/docs/querying/examples.md b/docs/querying/examples.md index 3bb2bbc6800..28fa49fe946 100644 --- a/docs/querying/examples.md +++ b/docs/querying/examples.md @@ -59,7 +59,9 @@ Assuming that the `http_requests_total` time series all have the labels `job` want to sum over the rate of all instances, so we get fewer output time series, but still preserve the `job` dimension: - sum(rate(http_requests_total[5m])) by (job) + sum by (job) ( + rate(http_requests_total[5m]) + ) If we have two different metrics with the same dimensional labels, we can apply binary operators to them and elements on both sides with the same label set @@ -71,9 +73,9 @@ scheduler exposing these metrics about the instances it runs): The same expression, but summed by application, could be written like this: - sum( + sum by (app, proc) ( instance_memory_limit_bytes - instance_memory_usage_bytes - ) by (app, proc) / 1024 / 1024 + ) / 1024 / 1024 If the same fictional cluster scheduler exposed CPU usage metrics like the following for every instance: @@ -87,9 +89,9 @@ following for every instance: ...we could get the top 3 CPU users grouped by application (`app`) and process type (`proc`) like this: - topk(3, sum(rate(instance_cpu_time_ns[5m])) by (app, proc)) + topk(3, sum by (app, proc) (rate(instance_cpu_time_ns[5m]))) Assuming this metric contains one time series per running instance, you could count the number of running instances per application like this: - count(instance_cpu_time_ns) by (app) + count by (app) (instance_cpu_time_ns) diff --git a/docs/querying/operators.md b/docs/querying/operators.md index 7f92b3622da..99143ec8a11 100644 --- a/docs/querying/operators.md +++ b/docs/querying/operators.md @@ -196,7 +196,12 @@ vector of fewer elements with aggregated values: * `quantile` (calculate φ-quantile (0 ≤ φ ≤ 1) over dimensions) These operators can either be used to aggregate over **all** label dimensions -or preserve distinct dimensions by including a `without` or `by` clause. +or preserve distinct dimensions by including a `without` or `by` clause. These +clauses may be used before or after the expression. + + [without|by (