diff --git a/config/config-observability.yaml b/config/config-observability.yaml index 4001341a1d8..7c883908485 100644 --- a/config/config-observability.yaml +++ b/config/config-observability.yaml @@ -34,98 +34,6 @@ data: # this example block and unindented to be in the data block # to actually change the configuration. - # logging.enable-var-log-collection defaults to false. - # A fluentd sidecar will be set up to collect var log if - # this flag is true. - logging.enable-var-log-collection: false - - # logging.fluentd-sidecar-image provides the fluentd sidecar image - # to inject as a sidecar to collect logs from /var/log. - # Must be presented if logging.enable-var-log-collection is true. - logging.fluentd-sidecar-image: k8s.gcr.io/fluentd-elasticsearch:v2.0.4 - - # logging.fluentd-sidecar-output-config provides the configuration - # for the fluentd sidecar, which will be placed into a configmap and - # mounted into the fluentd sidecar image. - logging.fluentd-sidecar-output-config: | - # Parse json log before sending to Elastic Search - - @type parser - key_name log - - @type multi_format - - format json - time_key fluentd-time # fluentd-time is reserved for structured logs - time_format %Y-%m-%dT%H:%M:%S.%NZ - - - format none - message_key log - - - - # Send to Elastic Search - - @id elasticsearch - @type elasticsearch - @log_level info - include_tag_key true - # Elasticsearch service is in monitoring namespace. - host elasticsearch-logging.knative-monitoring - port 9200 - logstash_format true - - @type file - path /var/log/fluentd-buffers/kubernetes.system.buffer - flush_mode interval - retry_type exponential_backoff - flush_thread_count 2 - flush_interval 5s - retry_forever - retry_max_interval 30 - chunk_limit_size 2M - queue_limit_length 8 - overflow_action block - - - - # logging.revision-url-template provides a template to use for producing the - # logging URL that is injected into the status of each Revision. - # This value is what you might use the the Knative monitoring bundle, and provides - # access to Kibana after setting up kubectl proxy. - logging.revision-url-template: | - http://localhost:8001/api/v1/namespaces/knative-monitoring/services/kibana-logging/proxy/app/kibana#/discover?_a=(query:(match:(kubernetes.labels.knative-dev%2FrevisionUID:(query:'${REVISION_UID}',type:phrase)))) - - # If non-empty, this enables queue proxy writing request logs to stdout. - # The value determines the shape of the request logs and it must be a valid go text/template. - # It is important to keep this as a single line. Multiple lines are parsed as separate entities - # by most collection agents and will split the request logs into multiple records. - # - # The following fields and functions are available to the template: - # - # Request: An http.Request (see https://golang.org/pkg/net/http/#Request) - # representing an HTTP request received by the server. - # - # Response: - # struct { - # Code int // HTTP status code (see https://www.iana.org/assignments/http-status-codes/http-status-codes.xhtml) - # Size int // An int representing the size of the response. - # Latency float64 // A float64 representing the latency of the response in seconds. - # } - # - # Revision: - # struct { - # Name string // Knative revision name - # Namespace string // Knative revision namespace - # Service string // Knative service name - # Configuration string // Knative configuration name - # PodName string // Name of the pod hosting the revision - # PodIP string // IP of the pod hosting the revision - # } - # - logging.request-log-template: '{"httpRequest": {"requestMethod": "{{.Request.Method}}", "requestUrl": "{{js .Request.RequestURI}}", "requestSize": "{{.Request.ContentLength}}", "status": {{.Response.Code}}, "responseSize": "{{.Response.Size}}", "userAgent": "{{js .Request.UserAgent}}", "remoteIp": "{{js .Request.RemoteAddr}}", "serverIp": "{{.Revision.PodIP}}", "referer": "{{js .Request.Referer}}", "latency": "{{.Response.Latency}}s", "protocol": "{{.Request.Proto}}"}, "traceId": "{{index .Request.Header "X-B3-Traceid"}}"}' - # metrics.backend-destination field specifies the system metrics destination. # It supports either prometheus (the default) or stackdriver. # Note: Using stackdriver will incur additional charges @@ -143,7 +51,7 @@ data: # metrics.allow-stackdriver-custom-metrics indicates whether it is allowed to send metrics to # Stackdriver using "global" resource type and custom metric type if the - # metrics are not supported by "knative_revision" resource type. Setting this - # flag to "true" could cause extra Stackdriver charge. + # metrics are not supported by "knative_broker", "knative_trigger", and "knative_source" resource types. + # Setting this flag to "true" could cause extra Stackdriver charge. # If metrics.backend-destination is not Stackdriver, this is ignored. metrics.allow-stackdriver-custom-metrics: "false" diff --git a/docs/metrics.md b/docs/metrics.md index 4d605800c1f..121157f3b6e 100644 --- a/docs/metrics.md +++ b/docs/metrics.md @@ -1,6 +1,6 @@ # Metrics -This is a list of metrics exported by Knative Eventing components. +This is a list of data-plane metrics exported by Knative Eventing components. ## Broker @@ -8,8 +8,8 @@ These are exported by `broker-ingress` pods. | Name | Type | Description | Tags | | ---------------------- | --------- | -------------------------- | ------------------ | -| `broker_events_total` | count | Number of events received. | `result`, `broker` | -| `broker_dispatch_time` | histogram | Time to dispatch an event. | `result`, `broker` | +| `event_count` | count | Number of events received by a Broker. | `namespace_name`, `broker_name`, `event_source`, `event_type`, `response_code`, `response_code_class` | +| `event_dispatch_latencies` | histogram | The time spent dispatching an event to a Channel. | `namespace_name`, `broker_name`, `event_source`, `event_type`, `response_code`, `response_code_class` | ## Trigger @@ -17,22 +17,38 @@ These are exported by `broker-filter` pods. | Name | Type | Description | Tags | | ---------------------------------- | --------- | ---------------------------------------------------- | ---------------------------------------------- | -| `trigger_events_total` | count | Number of events received. | `result`, `broker`, `trigger` | -| `trigger_dispatch_time` | histogram | Time to dispatch an event. | `result`, `broker`, `trigger` | -| `trigger_filter_time` | histogram | Time to filter an event. | `result`, `broker`, `trigger`, `filter_result` | -| `broker_to_function_delivery_time` | histogram | Time from ingress of an event until it is dispatched | `result`, `broker`, `trigger` | +| `event_count` | count | Number of events received by a Trigger | `namespace_name`, `trigger_name`, `broker_name`, `filter_source`, `filter_type`, `response_code`, `response_code_class` | +| `event_dispatch_latencies` | histogram | The time spent dispatching an event to a Trigger subscriber | `namespace_name`, `trigger_name`, `broker_name`, `filter_source`, `filter_type`, `response_code`, `response_code_class` | +| `event_processing_latencies` | histogram | The time spent processing an event before it is dispatched to a Trigger subscriber | `namespace_name`, `trigger_name`, `broker_name`, `filter_source`, `filter_type` | -## Access metrics +## Sources + +These are exported by core sources. + +### ApiServerSource + +| Name | Type | Description | Tags | +| ---------------------------------- | --------- | ---------------------------------------------------- | ---------------------------------------------- | +| `event_count` | count | Number of events sent | `namespace_name`, `source_name`, `source_resource_group`, `event_source`, `event_type`, `response_code`, `response_code_class` | + +### CronJobSource + +| Name | Type | Description | Tags | +| ---------------------------------- | --------- | ---------------------------------------------------- | ---------------------------------------------- | +| `event_count` | count | Number of events sent | `namespace_name`, `source_name`, `source_resource_group`, `event_source`, `event_type`, `response_code`, `response_code_class` | + + +# Access metrics + +## Prometheus Collection Accessing metrics requires Prometheus and Grafana installed. Follow the [instructions to install Prometheus and Grafana](https://github.com/knative/docs/blob/master/docs/serving/installing-logging-metrics-traces.md) in namespace `knative-monitoring`. -## Prometheus Collection - > _All commands assume root of repo._ -1. Enable Knatives install of Prometheus to scrape Knative with GCP, run the +1. Enable Knatives install of Prometheus to scrape Knative Eventing, run the following: ```shell @@ -90,7 +106,7 @@ in namespace `knative-monitoring`. #### Remove Scrape Config -Remove the text related to Cloud Run Events from `prometheus-scrape-config`, +Remove the text related to Knative Eventing from `prometheus-scrape-config`, ```shell kubectl edit configmap -n knative-monitoring prometheus-scrape-config @@ -118,3 +134,27 @@ Follow the [instructions to open Grafana dashboard](https://github.com/knative/docs/blob/master/docs/serving/accessing-metrics.md#grafana), then you will access the metrics at [http://localhost:3000](http://localhost:3000). + + +## StackDriver Collection + +1. Install Knative Stackdriver components by running the following command from + the root directory of [knative/serving](https://github.com/knative/serving) + repository: + + ```shell + kubectl apply --recursive --filename config/monitoring/100-namespace.yaml \ + --filename config/monitoring/metrics/stackdriver + ``` + +1. Run the following command to setup StackDriver as the metrics backend: + + ``` + kubectl edit cm -n knative-eventing config-observability + ``` + + Add `metrics.backend-destination: stackdriver` and `metrics.allow-stackdriver-custom-metrics: "true"` + to the `data` field. You can find detailed information in `data._example` field in the + `ConfigMap` you are editing. +1. Open the StackDriver UI and see your resource metrics in the StackDriver Metrics Explorer. + You should be able to see metrics with the prefix `custom.googleapis.com/knative.dev/`.