diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index b02b7ecfbfb1..379f0c9548a2 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -83,7 +83,8 @@ To check out this repository: 1. Create your own [fork of this repo](https://help.github.com/articles/fork-a-repo/) -2. Clone it to your machine: +1. Clone it to your machine: + ```shell mkdir -p ${GOPATH}/src/github.com/knative cd ${GOPATH}/src/github.com/knative @@ -156,6 +157,7 @@ ko apply -f config/ ``` You can see things running with: + ```shell kubectl -n knative-serving get pods NAME READY STATUS RESTARTS AGE @@ -173,35 +175,19 @@ If you're using a GCP project to host your Kubernetes cluster, it's good to chec [Discovery & load balancing](http://console.developers.google.com/kubernetes/discovery) page to ensure that all services are up and running (and not blocked by a quota issue, for example). -### Enable log and metric collection +### Install logging and monitoring backends -You can use two different setups for collecting logs(to Elasticsearch&Kibana) and metrics -(See [Logs and Metrics](./docs/telemetry.md) for setting up other logging backend): - -1. **150-elasticsearch-prod**: This configuration collects logs & metrics from user containers, build controller and Istio requests. +Run: ```shell kubectl apply -R -f config/monitoring/100-common \ - -f config/monitoring/150-elasticsearch-prod \ + -f config/monitoring/150-elasticsearch \ -f third_party/config/monitoring/common \ -f third_party/config/monitoring/elasticsearch \ -f config/monitoring/200-common \ -f config/monitoring/200-common/100-istio.yaml ``` -1. **150-elasticsearch-dev**: This configuration collects everything in (1) plus Knative Serving controller logs. - -```shell -kubectl apply -R -f config/monitoring/100-common \ - -f config/monitoring/150-elasticsearch-dev \ - -f third_party/config/monitoring/common \ - -f third_party/config/monitoring/elasticsearch \ - -f config/monitoring/200-common \ - -f config/monitoring/200-common/100-istio.yaml -``` - -Once complete, follow the instructions at [Logs and Metrics](./docs/telemetry.md). - ## Iterating As you make changes to the code-base, there are two special cases to be aware of: @@ -213,6 +199,7 @@ As you make changes to the code-base, there are two special cases to be aware of These are both idempotent, and we expect that running these at `HEAD` to have no diffs. Once the codegen and dependency information is correct, redeploying the controller is simply: + ```shell ko apply -f config/controller.yaml ``` @@ -223,6 +210,7 @@ redeploy `Knative Serving`](./README.md#start-knative). ## Clean up You can delete all of the service components with: + ```shell ko delete --ignore-not-found=true \ -f config/monitoring/100-common \ diff --git a/cmd/activator/main.go b/cmd/activator/main.go index 192f16b8712b..0a600e1f6e9c 100644 --- a/cmd/activator/main.go +++ b/cmd/activator/main.go @@ -28,6 +28,8 @@ import ( "net/url" "time" + "github.com/knative/serving/pkg/logging/logkey" + "github.com/knative/serving/pkg/activator" clientset "github.com/knative/serving/pkg/client/clientset/versioned" "github.com/knative/serving/pkg/configmap" @@ -35,6 +37,7 @@ import ( h2cutil "github.com/knative/serving/pkg/h2c" "github.com/knative/serving/pkg/logging" "github.com/knative/serving/pkg/signals" + "github.com/knative/serving/pkg/system" "github.com/knative/serving/third_party/h2c" "go.uber.org/zap" "k8s.io/client-go/kubernetes" @@ -45,6 +48,7 @@ const ( maxUploadBytes = 32e6 // 32MB - same as app engine maxRetry = 60 retryInterval = 1 * time.Second + logLevelKey = "activator" ) type activationHandler struct { @@ -127,7 +131,7 @@ func (a *activationHandler) handler(w http.ResponseWriter, r *http.Request) { endpoint, status, err := a.act.ActiveEndpoint(namespace, name) if err != nil { msg := fmt.Sprintf("Error getting active endpoint: %v", err) - a.logger.Errorf(msg) + a.logger.Error(msg) http.Error(w, msg, int(status)) return } @@ -157,9 +161,9 @@ func main() { if err != nil { log.Fatalf("Error parsing logging configuration: %v", err) } - logger, _ := logging.NewLoggerFromConfig(config, "activator") + logger, atomicLevel := logging.NewLoggerFromConfig(config, logLevelKey) defer logger.Sync() - + logger = logger.With(zap.String(logkey.ControllerType, "activator")) logger.Info("Starting the knative activator") clusterConfig, err := rest.InClusterConfig() @@ -186,6 +190,13 @@ func main() { a.Shutdown() }() + // Watch the logging config map and dynamically update logging levels. + configMapWatcher := configmap.NewDefaultWatcher(kubeClient, system.Namespace) + configMapWatcher.Watch(logging.ConfigName, logging.UpdateLevelFromConfigMap(logger, atomicLevel, logLevelKey)) + if err = configMapWatcher.Start(stopCh); err != nil { + logger.Fatalf("failed to start configuration manager: %v", err) + } + http.HandleFunc("/", ah.handler) h2c.ListenAndServe(":8080", nil) } diff --git a/cmd/autoscaler/main.go b/cmd/autoscaler/main.go index e925b1d3bc31..178839794710 100644 --- a/cmd/autoscaler/main.go +++ b/cmd/autoscaler/main.go @@ -36,6 +36,7 @@ import ( "github.com/knative/serving/pkg/configmap" "github.com/knative/serving/pkg/logging" "github.com/knative/serving/pkg/logging/logkey" + "github.com/knative/serving/pkg/system" "github.com/gorilla/websocket" @@ -54,6 +55,7 @@ const ( // seconds while an http request is taking the full timeout of 5 // second. scaleBufferSize = 10 + logLevelKey = "autoscaler" ) var ( @@ -70,6 +72,7 @@ var ( servingAutoscalerPort string currentScale int32 logger *zap.SugaredLogger + atomicLevel zap.AtomicLevel // Revision-level configuration concurrencyModel = flag.String("concurrencyModel", string(v1alpha1.RevisionRequestConcurrencyModelMulti), "") @@ -227,11 +230,12 @@ func main() { if err != nil { log.Fatalf("Error parsing logging configuration: %v", err) } - logger, _ = logging.NewLoggerFromConfig(logginConfig, "autoscaler") + logger, atomicLevel = logging.NewLoggerFromConfig(logginConfig, logLevelKey) defer logger.Sync() initEnv() logger = logger.With( + zap.String(logkey.ControllerType, "autoscaler"), zap.String(logkey.Namespace, servingNamespace), zap.String(logkey.Configuration, servingConfig), zap.String(logkey.Revision, servingRevision)) @@ -266,6 +270,15 @@ func main() { } statsReporter = reporter + // Watch the logging config map and dynamically update logging levels. + stopCh := make(chan struct{}) + defer close(stopCh) + configMapWatcher := configmap.NewDefaultWatcher(kubeClient, system.Namespace) + configMapWatcher.Watch(logging.ConfigName, logging.UpdateLevelFromConfigMap(logger, atomicLevel, logLevelKey)) + if err := configMapWatcher.Start(stopCh); err != nil { + logger.Fatalf("failed to start configuration manager: %v", err) + } + go runAutoscaler() go scaleSerializer() diff --git a/cmd/controller/main.go b/cmd/controller/main.go index a322ca50a38f..966b4d741446 100644 --- a/cmd/controller/main.go +++ b/cmd/controller/main.go @@ -22,13 +22,11 @@ import ( "time" "github.com/knative/serving/pkg/configmap" - "go.uber.org/zap" "github.com/knative/serving/pkg/controller" "github.com/knative/serving/pkg/logging" "github.com/knative/serving/pkg/system" - corev1 "k8s.io/api/core/v1" vpa "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/client/clientset/versioned" vpainformers "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/client/informers/externalversions" @@ -52,6 +50,7 @@ import ( const ( threadsPerController = 2 + logLevelKey = "controller" ) var ( @@ -69,7 +68,7 @@ func main() { if err != nil { log.Fatalf("Error parsing logging configuration: %v", err) } - logger, atomicLevel := logging.NewLoggerFromConfig(loggingConfig, "controller") + logger, atomicLevel := logging.NewLoggerFromConfig(loggingConfig, logLevelKey) defer logger.Sync() // set up signals so we handle the first shutdown signal gracefully @@ -164,7 +163,7 @@ func main() { } // Watch the logging config map and dynamically update logging levels. - configMapWatcher.Watch(logging.ConfigName, receiveLoggingConfig(logger, atomicLevel)) + configMapWatcher.Watch(logging.ConfigName, logging.UpdateLevelFromConfigMap(logger, atomicLevel, logLevelKey)) // These are non-blocking. kubeInformerFactory.Start(stopCh) @@ -213,20 +212,3 @@ func init() { flag.StringVar(&kubeconfig, "kubeconfig", "", "Path to a kubeconfig. Only required if out-of-cluster.") flag.StringVar(&masterURL, "master", "", "The address of the Kubernetes API server. Overrides any value in kubeconfig. Only required if out-of-cluster.") } - -func receiveLoggingConfig(logger *zap.SugaredLogger, atomicLevel zap.AtomicLevel) func(configMap *corev1.ConfigMap) { - return func(configMap *corev1.ConfigMap) { - loggingConfig, err := logging.NewConfigFromConfigMap(configMap) - if err != nil { - logger.Error("Failed to parse the logging configmap. Previous config map will be used.", zap.Error(err)) - return - } - - if level, ok := loggingConfig.LoggingLevel["controller"]; ok { - if atomicLevel.Level() != level { - logger.Infof("Updating logging level from %v to %v.", atomicLevel.Level(), level) - atomicLevel.SetLevel(level) - } - } - } -} diff --git a/cmd/controller/main_test.go b/cmd/controller/main_test.go deleted file mode 100644 index de6866b8cfcd..000000000000 --- a/cmd/controller/main_test.go +++ /dev/null @@ -1,63 +0,0 @@ -/* -Copyright 2018 The Knative Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package main - -import ( - "testing" - - "github.com/knative/serving/pkg/logging" - "github.com/knative/serving/pkg/system" - "go.uber.org/zap/zapcore" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -func TestReceiveLoggingConfigMap(t *testing.T) { - logger, atomicLevel := logging.NewLogger("", "debug") - want := zapcore.DebugLevel - if atomicLevel.Level() != zapcore.DebugLevel { - t.Fatalf("Expected initial logger level to %v, got: %v", want, atomicLevel.Level()) - } - - receiveFunc := receiveLoggingConfig(logger, atomicLevel) - cm := &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: system.Namespace, - Name: "config-logging", - }, - Data: map[string]string{ - "zap-logger-config": "", - "loglevel.controller": "info", - }, - } - - for _, test := range []struct { - l zapcore.Level - s string - }{ - {zapcore.InfoLevel, "info"}, - {zapcore.DebugLevel, "debug"}, - {zapcore.ErrorLevel, "error"}, - {zapcore.ErrorLevel, "invalid level"}, - } { - cm.Data["loglevel.controller"] = test.s - receiveFunc(cm) - if atomicLevel.Level() != test.l { - t.Errorf("Expected logger level to be %v, got: %v", test.l, atomicLevel.Level()) - } - } -} diff --git a/cmd/webhook/main.go b/cmd/webhook/main.go index 1bd91cabeda3..8c672b13671d 100644 --- a/cmd/webhook/main.go +++ b/cmd/webhook/main.go @@ -24,6 +24,7 @@ import ( "github.com/knative/serving/pkg/configmap" "github.com/knative/serving/pkg/logging" + "github.com/knative/serving/pkg/logging/logkey" "github.com/knative/serving/pkg/signals" "github.com/knative/serving/pkg/system" "github.com/knative/serving/pkg/webhook" @@ -32,6 +33,10 @@ import ( "k8s.io/client-go/rest" ) +const ( + logLevelKey = "webhook" +) + func main() { flag.Parse() cm, err := configmap.Load("/etc/config-logging") @@ -42,8 +47,9 @@ func main() { if err != nil { log.Fatalf("Error parsing logging configuration: %v", err) } - logger, _ := logging.NewLoggerFromConfig(config, "webhook") + logger, atomicLevel := logging.NewLoggerFromConfig(config, logLevelKey) defer logger.Sync() + logger = logger.With(zap.String(logkey.ControllerType, "webhook")) logger.Info("Starting the Configuration Webhook") @@ -55,11 +61,18 @@ func main() { logger.Fatal("Failed to get in cluster config", zap.Error(err)) } - clientset, err := kubernetes.NewForConfig(clusterConfig) + kubeClient, err := kubernetes.NewForConfig(clusterConfig) if err != nil { logger.Fatal("Failed to get the client set", zap.Error(err)) } + // Watch the logging config map and dynamically update logging levels. + configMapWatcher := configmap.NewDefaultWatcher(kubeClient, system.Namespace) + configMapWatcher.Watch(logging.ConfigName, logging.UpdateLevelFromConfigMap(logger, atomicLevel, logLevelKey)) + if err = configMapWatcher.Start(stopCh); err != nil { + logger.Fatalf("failed to start configuration manager: %v", err) + } + options := webhook.ControllerOptions{ ServiceName: "webhook", ServiceNamespace: system.Namespace, @@ -67,7 +80,7 @@ func main() { SecretName: "webhook-certs", WebhookName: "webhook.knative.dev", } - controller, err := webhook.NewAdmissionController(clientset, options, logger) + controller, err := webhook.NewAdmissionController(kubeClient, options, logger) if err != nil { logger.Fatal("Failed to create the admission controller", zap.Error(err)) } diff --git a/config/activator.yaml b/config/activator.yaml index cfda00c92304..28a0ef3d66eb 100644 --- a/config/activator.yaml +++ b/config/activator.yaml @@ -39,9 +39,10 @@ spec: - name: http containerPort: 8080 args: - - "-logtostderr=true" - - "-stderrthreshold=INFO" - + # Disable glog writing into stderr. Our code doesn't use glog + # and seeing k8s logs in addition to ours is not useful. + - "-logtostderr=false" + - "-stderrthreshold=FATAL" volumeMounts: - name: config-logging mountPath: /etc/config-logging diff --git a/config/config-logging.yaml b/config/config-logging.yaml index b9f66fa073b7..de338a67c688 100644 --- a/config/config-logging.yaml +++ b/config/config-logging.yaml @@ -42,6 +42,9 @@ data: } # Log level overrides + # For all components except the autoscaler and queue proxy, + # changes are be picked up immediately. + # For autoscaler and queue proxy, changes require recreation of the pods. loglevel.controller: "info" loglevel.autoscaler: "info" loglevel.queueproxy: "info" diff --git a/config/monitoring/150-elasticsearch-prod/100-fluentd-configmap.yaml b/config/monitoring/150-elasticsearch-prod/100-fluentd-configmap.yaml deleted file mode 100644 index 63ef0f282d37..000000000000 --- a/config/monitoring/150-elasticsearch-prod/100-fluentd-configmap.yaml +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright 2018 The Knative Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -kind: ConfigMap -apiVersion: v1 -metadata: - name: fluentd-ds-config - namespace: monitoring - labels: - addonmanager.kubernetes.io/mode: Reconcile -data: - 100.system.conf: |- - - root_dir /tmp/fluentd-buffers/ - - 200.containers.input.conf: |- - - @id fluentd-containers.log - @type tail - path /var/log/containers/*user-container-*.log,/var/log/containers/*build-step-*.log - pos_file /var/log/es-containers.log.pos - time_format %Y-%m-%dT%H:%M:%S.%NZ - tag raw.kubernetes.* - format json - read_from_head true - - # Combine multi line logs which form an exception stack trace into a single log entry - - @id raw.kubernetes - @type detect_exceptions - remove_tag_prefix raw - message log - stream stream - multiline_flush_interval 5 - max_bytes 500000 - max_lines 1000 - - # Add Kubernetes metadata - - @type kubernetes_metadata - - 300.forward.input.conf: |- - # Takes the messages sent over TCP, e.g. request logs from Istio - - @type forward - port 24224 - - 900.output.conf: |- - # Send to Elastic Search - - @id elasticsearch - @type elasticsearch - @log_level info - include_tag_key true - host elasticsearch-logging - port 9200 - logstash_format true - - @type file - path /var/log/fluentd-buffers/kubernetes.system.buffer - flush_mode interval - retry_type exponential_backoff - flush_thread_count 2 - flush_interval 5s - retry_forever - retry_max_interval 30 - chunk_limit_size 2M - queue_limit_length 8 - overflow_action block - - diff --git a/config/monitoring/150-elasticsearch-prod/100-scaling-configmap.yaml b/config/monitoring/150-elasticsearch-prod/100-scaling-configmap.yaml deleted file mode 100644 index 997671cb1cd8..000000000000 --- a/config/monitoring/150-elasticsearch-prod/100-scaling-configmap.yaml +++ /dev/null @@ -1,455 +0,0 @@ -# Copyright 2018 The Knative Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -apiVersion: v1 -kind: ConfigMap -metadata: - name: scaling-config - namespace: monitoring -data: - scaling-dashboard.json: |+ - { - "__inputs": [ - { - "description": "", - "label": "prometheus", - "name": "prometheus", - "pluginId": "prometheus", - "pluginName": "Prometheus", - "type": "datasource" - } - ], - "annotations": { - "list": [] - }, - "description": "Knative Serving - Scaling", - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "id": null, - "iteration": 1525724908045, - "links": [], - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fill": 1, - "gridPos": { - "h": 17, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 2, - "legend": { - "avg": false, - "current": true, - "max": false, - "min": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Panic Mode", - "color": "#f29191", - "dashes": true, - "fill": 2, - "linewidth": 2, - "steppedLine": true, - "yaxis": 2 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": true, - "targets": [ - { - "expr": "autoscaler_actual_pod_count{configuration_namespace=\"$namespace\", configuration=\"$configuration\", revision=\"$revision\"}", - "format": "time_series", - "interval": "1s", - "intervalFactor": 1, - "legendFormat": "Actual Pods", - "refId": "A" - }, - { - "expr": "autoscaler_desired_pod_count{configuration_namespace=\"$namespace\", configuration=\"$configuration\", revision=\"$revision\"} ", - "format": "time_series", - "interval": "1s", - "intervalFactor": 1, - "legendFormat": "Desired Pods", - "refId": "B" - }, - { - "expr": "autoscaler_requested_pod_count{configuration_namespace=\"$namespace\", configuration=\"$configuration\", revision=\"$revision\"} ", - "format": "time_series", - "interval": "1s", - "intervalFactor": 1, - "legendFormat": "Requested Pods", - "refId": "C" - }, - { - "expr": "autoscaler_panic_mode{configuration_namespace=\"$namespace\", configuration=\"$configuration\", revision=\"$revision\"} ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "1s", - "intervalFactor": 1, - "legendFormat": "Panic Mode", - "refId": "D" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Revision Pod Counts", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "decimals": null, - "format": "short", - "label": "Panic Mode", - "logBase": 1, - "max": "1.0", - "min": "0", - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fill": 1, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 17 - }, - "id": 4, - "legend": { - "avg": false, - "current": true, - "max": false, - "min": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(kube_pod_container_resource_requests_cpu_cores{namespace=\"$namespace\", pod=~\"$revision-deployment-.*\"})", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "Cores requested", - "refId": "A" - }, - { - "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod_name=~\"$revision-deployment-.*\"}[1m]))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Cores used", - "refId": "B" - }, - { - "expr": "sum(kube_pod_container_resource_limits_cpu_cores{namespace=\"$namespace\", pod=~\"$revision-deployment-.*\"})", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Core limit", - "refId": "C" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Revision CPU Usage", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fill": 1, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 17 - }, - "id": 6, - "legend": { - "avg": false, - "current": true, - "max": false, - "min": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\", pod=~\"$revision-deployment-.*\"})", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "Memory requested", - "refId": "A" - }, - { - "expr": "sum(container_memory_usage_bytes{namespace=\"$namespace\", pod_name=~\"$revision-deployment-.*\"})", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "Memory used", - "refId": "B" - }, - { - "expr": "sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\", pod=~\"$revision-deployment-.*\"})", - "format": "time_series", - "intervalFactor": 1, - "refId": "C" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Pod Memory Usage", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "refresh": "5s", - "schemaVersion": 16, - "style": "dark", - "tags": [], - "templating": { - "list": [ - { - "allValue": null, - "current": {}, - "datasource": "prometheus", - "hide": 0, - "includeAll": false, - "label": "Namespace", - "multi": false, - "name": "namespace", - "options": [], - "query": "label_values(autoscaler_actual_pod_count, configuration_namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": {}, - "datasource": "prometheus", - "hide": 0, - "includeAll": false, - "label": "Configuration", - "multi": false, - "name": "configuration", - "options": [], - "query": "label_values(autoscaler_actual_pod_count{configuration_namespace=\"$namespace\"}, configuration)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": {}, - "datasource": "prometheus", - "hide": 0, - "includeAll": false, - "label": "Revision", - "multi": false, - "name": "revision", - "options": [], - "query": "label_values(autoscaler_actual_pod_count{configuration_namespace=\"$namespace\", configuration=\"$configuration\"}, revision)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-15m", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "Knative Serving - Scaling", - "uid": "u_-9SIMiz", - "version": 4 - } \ No newline at end of file diff --git a/config/monitoring/150-elasticsearch-dev/100-fluentd-configmap.yaml b/config/monitoring/150-elasticsearch/100-fluentd-configmap.yaml similarity index 90% rename from config/monitoring/150-elasticsearch-dev/100-fluentd-configmap.yaml rename to config/monitoring/150-elasticsearch/100-fluentd-configmap.yaml index af8bf88402a8..de9b33f8dafa 100644 --- a/config/monitoring/150-elasticsearch-dev/100-fluentd-configmap.yaml +++ b/config/monitoring/150-elasticsearch/100-fluentd-configmap.yaml @@ -28,7 +28,7 @@ data: @id fluentd-containers.log @type tail - path /var/log/containers/*user-container-*.log,/var/log/containers/*build-step-*.log,/var/log/containers/*controller-*.log,/var/log/containers/*webhook-*.log,/var/log/containers/*autoscaler-*.log,/var/log/containers/*queue-proxy-*.log,/var/log/containers/*activator-*.log + path /var/log/containers/*user-container-*.log,/var/log/containers/*build-step-*.log,/var/log/containers/controller-*controller-*.log,/var/log/containers/webhook-*webhook-*.log,/var/log/containers/*autoscaler-*autoscaler-*.log,/var/log/containers/*queue-proxy-*.log,/var/log/containers/activator-*activator-*.log pos_file /var/log/es-containers.log.pos time_format %Y-%m-%dT%H:%M:%S.%NZ tag raw.kubernetes.* diff --git a/config/monitoring/150-elasticsearch-dev/100-scaling-configmap-dev.yaml b/config/monitoring/150-elasticsearch/100-scaling-configmap.yaml similarity index 100% rename from config/monitoring/150-elasticsearch-dev/100-scaling-configmap-dev.yaml rename to config/monitoring/150-elasticsearch/100-scaling-configmap.yaml diff --git a/config/monitoring/150-stackdriver-prod/fluentd-configmap.yaml b/config/monitoring/150-stackdriver-prod/fluentd-configmap.yaml deleted file mode 100644 index e793782074e3..000000000000 --- a/config/monitoring/150-stackdriver-prod/fluentd-configmap.yaml +++ /dev/null @@ -1,93 +0,0 @@ -# Copyright 2018 The Knative Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -kind: ConfigMap -apiVersion: v1 -metadata: - name: fluentd-ds-config - namespace: monitoring - labels: - addonmanager.kubernetes.io/mode: Reconcile -data: - 100.system.conf: |- - - root_dir /tmp/fluentd-buffers/ - - 200.containers.input.conf: |- - - @id fluentd-containers.log - @type tail - # path is different from dev configuration - path /var/log/containers/*user-container-*.log,/var/log/containers/*build-step-*.log - pos_file /var/log/es-containers.log.pos - time_format %Y-%m-%dT%H:%M:%S.%NZ - tag raw.kubernetes.* - format json - read_from_head true - - # Combine multi line logs which form an exception stack trace into a single log entry - - @id raw.kubernetes - @type detect_exceptions - remove_tag_prefix raw - message log - stream stream - multiline_flush_interval 5 - max_bytes 500000 - max_lines 1000 - - # Add Kubernetes metadata - - @type kubernetes_metadata - merge_json_log false # Don't parse json log - preserve_json_log false - - 300.forward.input.conf: |- - # Takes the messages sent over TCP, e.g. request logs from Istio - - @type forward - port 24224 - - 900.output.conf: |- - # Send to Stackdriver - # google_cloud plugin moves `kubernetes` metadata to `labels`. - - @type google_cloud - - # Try to detect JSON formatted log entries. - detect_json true - # Allow log entries from multiple containers to be sent in the same request. - split_logs_by_tag false - # Set the buffer type to file to improve the reliability and reduce the memory consumption - buffer_type file - buffer_path /var/log/fluentd-buffers/kubernetes.containers.buffer - # Set queue_full action to block because we want to pause gracefully - # in case of the off-the-limits load instead of throwing an exception - buffer_queue_full_action block - # Set the chunk limit conservatively to avoid exceeding the recommended - # chunk size of 5MB per write request. - buffer_chunk_limit 1M - # Cap the combined memory usage of this buffer and the one below to - # 1MiB/chunk * (6 + 2) chunks = 8 MiB - buffer_queue_limit 6 - # Never wait more than 5 seconds before flushing logs in the non-error case. - flush_interval 5s - # Never wait longer than 30 seconds between retries. - max_retry_wait 30 - # Disable the limit on the number of retries (retry forever). - disable_retry_limit - # Use multiple threads for processing. - num_threads 2 - use_grpc true - diff --git a/config/monitoring/150-stackdriver-dev/fluentd-configmap.yaml b/config/monitoring/150-stackdriver/fluentd-configmap.yaml similarity index 98% rename from config/monitoring/150-stackdriver-dev/fluentd-configmap.yaml rename to config/monitoring/150-stackdriver/fluentd-configmap.yaml index 8dd2e562aa43..0cf4377e2c16 100644 --- a/config/monitoring/150-stackdriver-dev/fluentd-configmap.yaml +++ b/config/monitoring/150-stackdriver/fluentd-configmap.yaml @@ -28,7 +28,6 @@ data: @id fluentd-containers.log @type tail - # path is different from prod configuration path /var/log/containers/*user-container-*.log,/var/log/containers/*build-step-*.log,/var/log/containers/*controller-*.log,/var/log/containers/*webhook-*.log,/var/log/containers/*autoscaler-*.log,/var/log/containers/*queue-proxy-*.log,/var/log/containers/*activator-*.log pos_file /var/log/es-containers.log.pos time_format %Y-%m-%dT%H:%M:%S.%NZ diff --git a/config/monitoring/README.md b/config/monitoring/README.md index 19dbf50dcc98..29e99af30516 100644 --- a/config/monitoring/README.md +++ b/config/monitoring/README.md @@ -6,7 +6,7 @@ monitoring components by running the following at the root of the repository: ```shell kubectl apply -R -f config/monitoring/100-common \ - -f config/monitoring/150-elasticsearch-prod \ + -f config/monitoring/150-elasticsearch \ -f third_party/config/monitoring/common \ -f third_party/config/monitoring/elasticsearch \ -f config/monitoring/200-common \ @@ -26,9 +26,8 @@ a three digit prefix is added. * The root folder (`config/monitoring`) is special. It requires the following installation ordering: * `/config/monitoring/100-common` - * Only one of `/config/monitoring/150-*`. File with `dev` postfix is a special - configuration that enables verbose logging and should only be used for development - purposes. File with `elasticsearch` or `stackdriver` indicates the logging destination. + * Only one of `/config/monitoring/150-*`. + File with `elasticsearch` or `stackdriver` indicates the logging destination. * `/third_party/config/monitoring/common` * `/third_party/config/monitoring/elasticsearch`. Required only when Elasticsearch is used as logging destination. * `/config/monitoring/200-common` diff --git a/docs/setting-up-a-logging-plugin.md b/docs/setting-up-a-logging-plugin.md index 81cda8039a9e..80b2a4074550 100644 --- a/docs/setting-up-a-logging-plugin.md +++ b/docs/setting-up-a-logging-plugin.md @@ -68,7 +68,7 @@ kubectl apply -f \ ``` In the commands above, replace `` with the -Fluentd DaemonSet configuration file, e.g. `config/monitoring/150-stackdriver-prod`. +Fluentd DaemonSet configuration file, e.g. `config/monitoring/150-stackdriver`. **NOTE**: Operators sometimes need to deploy extra services as the logging backends. For example, if they desire Elasticsearch&Kibana, they have to deploy diff --git a/docs/telemetry.md b/docs/telemetry.md index a834da7b19c2..36aa5d5a28b6 100644 --- a/docs/telemetry.md +++ b/docs/telemetry.md @@ -1,202 +1,15 @@ -# Logs and metrics +# Logs, metrics and traces -## Monitoring components setup - -First, deploy monitoring components. - -### Elasticsearch, Kibana, Prometheus, and Grafana Setup - -You can use two different setups: - -1. **150-elasticsearch-prod**: This configuration collects logs & metrics from -user containers, build controller and Istio requests. - - ```shell - kubectl apply -R -f config/monitoring/100-common \ - -f config/monitoring/150-elasticsearch-prod \ - -f third_party/config/monitoring/common \ - -f third_party/config/monitoring/elasticsearch \ - -f config/monitoring/200-common \ - -f config/monitoring/200-common/100-istio.yaml - ``` - -1. **150-elasticsearch-dev**: This configuration collects everything **150 --elasticsearch-prod** does, plus Knative Serving controller logs. - - ```shell - kubectl apply -R -f config/monitoring/100-common \ - -f config/monitoring/150-elasticsearch-dev \ - -f third_party/config/monitoring/common \ - -f third_party/config/monitoring/elasticsearch \ - -f config/monitoring/200-common \ - -f config/monitoring/200-common/100-istio.yaml - ``` - -### Stackdriver, Prometheus, and Grafana Setup - -If your Knative Serving is not built on a Google Cloud Platform based cluster, -or you want to send logs to another GCP project, you need to build your own -Fluentd image and modify the configuration first. See - -1. [Fluentd image on Knative Serving](/image/fluentd/README.md) -2. [Setting up a logging plugin](setting-up-a-logging-plugin.md) - -Then you can use two different setups: - -1. **150-stackdriver-prod**: This configuration collects logs and metrics from -user containers, build controller, and Istio requests. - -```shell -kubectl apply -R -f config/monitoring/100-common \ - -f config/monitoring/150-stackdriver-prod \ - -f third_party/config/monitoring/common \ - -f config/monitoring/200-common \ - -f config/monitoring/200-common/100-istio.yaml -``` - -2. **150-stackdriver-dev**: This configuration collects everything **150 --stackdriver-prod** does, plus Knative Serving controller logs. - -```shell -kubectl apply -R -f config/monitoring/100-common \ - -f config/monitoring/150-stackdriver-dev \ - -f third_party/config/monitoring/common \ - -f config/monitoring/200-common \ - -f config/monitoring/200-common/100-istio.yaml -``` - -## Accessing logs - -### Kibana and Elasticsearch - -To open the Kibana UI (the visualization tool for [Elasticsearch](https://info.elastic.co), -enter the following command: - -```shell -kubectl proxy -``` - -This starts a local proxy of Kibana on port 8001. The Kibana UI is only exposed within -the cluster for security reasons. - -Navigate to the [Kibana UI](http://localhost:8001/api/v1/namespaces/monitoring/services/kibana-logging/proxy/app/kibana) -(*It might take a couple of minutes for the proxy to work*). - -When Kibana is opened the first time, it will ask you to create an index. -Accept the default options: - -![Kibana UI Configuring an Index Pattern](images/kibana-landing-page-configure-index.png) - -The Discover tab of the Kibana UI looks like this: - -![Kibana UI Discover tab](images/kibana-discover-tab-annotated.png) - -You can change the time frame of logs Kibana displays in the upper right corner -of the screen. The main search bar is across the top of the Dicover page. - -As more logs are ingested, new fields will be discovered. To have them indexed, -go to Management > Index Patterns > Refresh button (on top right) > Refresh -fields. - - - -#### Accessing configuration and revision logs - -To access the logs for a configuration, enter the following search query in Kibana: - -``` -kubernetes.labels.serving_knative_dev\/configuration: "configuration-example" -``` - -Replace `configuration-example` with your configuration's name. Enter the following -command to get your configuration's name: - -```shell -kubectl get configurations -``` - -To access logs for a revision, enter the following search query in Kibana: - -``` -kubernetes.labels.serving_knative_dev\/revision: "configuration-example-00001" -``` - -Replace `configuration-example-00001` with your revision's name. - -#### Accessing build logs - -To access the logs for a build, enter the following search query in Kibana: - -``` -kubernetes.labels.build\-name: "test-build" -``` - -Replace `test-build` with your build's name. The build name is specified in the `.yaml` file as follows: - -```yaml -apiVersion: build.knative.dev/v1alpha1 -kind: Build -metadata: - name: test-build -``` - -### Stackdriver - -Go to the [Google Cloud Console logging page](https://console.cloud.google.com/logs/viewer) for -your GCP project which stores your logs via Stackdriver. - -## Accessing metrics - -Enter: - -```shell -kubectl port-forward -n monitoring $(kubectl get pods -n monitoring --selector=app=grafana --output=jsonpath="{.items..metadata.name}") 3000 -``` - -Then open the Grafana UI at [http://localhost:3000](http://localhost:3000). The following dashboards are -pre-installed with Knative Serving: - -* **Revision HTTP Requests:** HTTP request count, latency and size metrics per revision and per configuration -* **Nodes:** CPU, memory, network and disk metrics at node level -* **Pods:** CPU, memory and network metrics at pod level -* **Deployment:** CPU, memory and network metrics aggregated at deployment level -* **Istio, Mixer and Pilot:** Detailed Istio mesh, Mixer and Pilot metrics -* **Kubernetes:** Dashboards giving insights into cluster health, deployments and capacity usage - -### Accessing per request traces - -Before you can view per request metrics, you'll need to create a new index pattern that will store -per request traces captured by Zipkin: - -1. Start the Kibana UI serving on local port 8001 by entering the following command: - - ```shell - kubectl proxy - ``` - -1. Open the [Kibana UI](http://localhost:8001/api/v1/namespaces/monitoring/services/kibana-logging/proxy/app/kibana). - -1. Navigate to Management -> Index Patterns -> Create Index Pattern. - -1. Enter `zipkin*` in the "Index pattern" text field. - -1. Click **Create**. - -After you've created the Zipkin index pattern, open the -[Zipkin UI](http://localhost:8001/api/v1/namespaces/istio-system/services/zipkin:9411/proxy/zipkin/). -Click on "Find Traces" to see the latest traces. You can search for a trace ID -or look at traces of a specific application. Click on a trace to see a detailed -view of a specific call. - -To see a demo of distributed tracing, deploy the -[Telemetry sample](../sample/telemetrysample/README.md), send some traffic to it, -then explore the traces it generates from Zipkin UI. - - +Install monitoring components using +[Monitoring, Logging and Tracing Installation](https://github.com/knative/docs/blob/master/serving/installing-logging-metrics-traces.md). +Once finished, visit +[Knative Serving](https://github.com/knative/docs/tree/master/serving) +for guides on accessing logs, metrics and traces. ## Default metrics The following metrics are collected by default: + * Knative Serving controller metrics * Istio metrics (mixer, envoy and pilot) * Node and pod metrics @@ -306,7 +119,7 @@ func main() { } ``` -3. In your code where you want to instrument, set the counter with the +3.In your code where you want to instrument, set the counter with the appropriate label values - example: ```go @@ -318,7 +131,7 @@ tag.New( stats.Record(ctx, desiredPodCountM.M({Measurement Value})) ``` -4. Add the following to scape config file located at +4.Add the following to scape config file located at config/monitoring/200-common/300-prometheus/100-scrape-config.yaml: ```yaml @@ -348,25 +161,28 @@ config/monitoring/200-common/300-prometheus/100-scrape-config.yaml: replacement: $1 ``` -5. Redeploy prometheus and its configuration: +5.Redeploy prometheus and its configuration: + ```sh kubectl delete -f config/monitoring/200-common/300-prometheus kubectl apply -f config/monitoring/200-common/300-prometheus ``` -6. Add a dashboard for your metrics - you can see examples of it under +6.Add a dashboard for your metrics - you can see examples of it under config/grafana/dashboard-definition folder. An easy way to generate JSON definitions is to use Grafana UI (make sure to login with as admin user) and [export JSON](http://docs.grafana.org/reference/export_import) from it. -7. Validate the metrics flow either by Grafana UI or Prometheus UI (see +7.Validate the metrics flow either by Grafana UI or Prometheus UI (see Troubleshooting section above to enable Prometheus UI) ## Distributed tracing with Zipkin -Check [Telemetry sample](../sample/telemetrysample/README.md) as an example usage of -[OpenZipkin](https://zipkin.io/pages/existing_instrumentations)'s Go client library. + +Check [Telemetry sample](https://github.com/knative/docs/tree/master/serving/samples/telemetry-go) +as an example usage of [OpenZipkin](https://zipkin.io/pages/existing_instrumentations)'s Go client library. ## Delete monitoring components + Enter: ```shell diff --git a/hack/release.sh b/hack/release.sh index 83f7c9e55a9d..8b5b965a64c8 100755 --- a/hack/release.sh +++ b/hack/release.sh @@ -76,7 +76,7 @@ cp ${OUTPUT_YAML} ${LITE_YAML} cp ${OUTPUT_YAML} ${NO_MON_YAML} # Use ko to concatenate them all together. ko resolve ${KO_FLAGS} -R -f config/monitoring/100-common \ - -f config/monitoring/150-elasticsearch-prod \ + -f config/monitoring/150-elasticsearch \ -f third_party/config/monitoring/common \ -f third_party/config/monitoring/elasticsearch \ -f config/monitoring/200-common \ @@ -86,7 +86,7 @@ ko resolve ${KO_FLAGS} -R -f config/monitoring/100-common \ -f third_party/config/monitoring/common/istio \ -f third_party/config/monitoring/common/kubernetes/kube-state-metrics \ -f third_party/config/monitoring/common/prometheus-operator \ - -f config/monitoring/150-elasticsearch-prod/100-scaling-configmap.yaml \ + -f config/monitoring/150-elasticsearch/100-scaling-configmap.yaml \ -f config/monitoring/200-common/100-fluentd.yaml \ -f config/monitoring/200-common/100-grafana-dash-knative-efficiency.yaml \ -f config/monitoring/200-common/100-grafana-dash-knative.yaml \ diff --git a/pkg/controller/configuration/configuration.go b/pkg/controller/configuration/configuration.go index 2d8c57f66527..4307b8ab112d 100644 --- a/pkg/controller/configuration/configuration.go +++ b/pkg/controller/configuration/configuration.go @@ -33,7 +33,6 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/client-go/tools/cache" ) @@ -97,7 +96,7 @@ func (c *Controller) Reconcile(key string) error { // Convert the namespace/name string into a distinct namespace and name namespace, name, err := cache.SplitMetaNamespaceKey(key) if err != nil { - runtime.HandleError(fmt.Errorf("invalid resource key: %s", key)) + c.Logger.Errorf("invalid resource key: %s", key) return nil } // Wrap our logger with the additional context of the configuration that we are reconciling. @@ -108,7 +107,7 @@ func (c *Controller) Reconcile(key string) error { original, err := c.configurationLister.Configurations(namespace).Get(name) if errors.IsNotFound(err) { // The resource no longer exists, in which case we stop processing. - runtime.HandleError(fmt.Errorf("configuration %q in work queue no longer exists", key)) + logger.Errorf("configuration %q in work queue no longer exists", key) return nil } else if err != nil { return err diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index f7cf0caed879..8e6d62d69c37 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -156,7 +156,7 @@ func (c *Base) Enqueue(obj interface{}) { var key string var err error if key, err = cache.DeletionHandlingMetaNamespaceKeyFunc(obj); err != nil { - runtime.HandleError(err) + c.Logger.Error(zap.Error(err)) return } c.EnqueueKey(key) @@ -170,7 +170,7 @@ func (c *Base) EnqueueControllerOf(obj interface{}) { // to enqueue the last known owner. object, err := meta.Accessor(obj) if err != nil { - runtime.HandleError(err) + c.Logger.Error(zap.Error(err)) return } @@ -247,7 +247,7 @@ func (c *Base) processNextWorkItem(syncHandler func(string) error) bool { // Forget here else we'd go into a loop of attempting to // process a work item that is invalid. c.WorkQueue.Forget(obj) - runtime.HandleError(fmt.Errorf("expected string in workqueue but got %#v", obj)) + c.Logger.Errorf("expected string in workqueue but got %#v", obj) return nil } // Run the syncHandler, passing it the namespace/name string of the @@ -263,7 +263,7 @@ func (c *Base) processNextWorkItem(syncHandler func(string) error) bool { }(obj) if err != nil { - runtime.HandleError(err) + c.Logger.Error(zap.Error(err)) return true } diff --git a/pkg/controller/revision/resources/autoscaler.go b/pkg/controller/revision/resources/autoscaler.go index ec583c5ab1e6..8c9dc85c5a98 100644 --- a/pkg/controller/revision/resources/autoscaler.go +++ b/pkg/controller/revision/resources/autoscaler.go @@ -139,6 +139,10 @@ func MakeAutoscalerDeployment(rev *v1alpha1.Revision, autoscalerImage string, re }}, Args: []string{ fmt.Sprintf("-concurrencyModel=%v", rev.Spec.ConcurrencyModel), + // Disable glog writing into stderr. Our code doesn't use glog + // and seeing k8s logs in addition to ours is not useful. + "-logtostderr=false", + "-stderrthreshold=FATAL", }, VolumeMounts: autoscalerVolumeMounts, }}, diff --git a/pkg/controller/revision/resources/autoscaler_test.go b/pkg/controller/revision/resources/autoscaler_test.go index feca46a9e135..bb9ce166d280 100644 --- a/pkg/controller/revision/resources/autoscaler_test.go +++ b/pkg/controller/revision/resources/autoscaler_test.go @@ -208,7 +208,7 @@ func TestMakeAutoscalerDeployment(t *testing.T) { Name: "SERVING_AUTOSCALER_PORT", Value: strconv.Itoa(AutoscalerPort), }}, - Args: []string{"-concurrencyModel=Single"}, + Args: []string{"-concurrencyModel=Single", "-logtostderr=false", "-stderrthreshold=FATAL"}, VolumeMounts: autoscalerVolumeMounts, }}, ServiceAccountName: "autoscaler", @@ -293,7 +293,7 @@ func TestMakeAutoscalerDeployment(t *testing.T) { Name: "SERVING_AUTOSCALER_PORT", Value: strconv.Itoa(AutoscalerPort), }}, - Args: []string{"-concurrencyModel=Multi"}, + Args: []string{"-concurrencyModel=Multi", "-logtostderr=false", "-stderrthreshold=FATAL"}, VolumeMounts: autoscalerVolumeMounts, }}, ServiceAccountName: "autoscaler", @@ -385,7 +385,7 @@ func TestMakeAutoscalerDeployment(t *testing.T) { Name: "SERVING_AUTOSCALER_PORT", Value: strconv.Itoa(AutoscalerPort), }}, - Args: []string{"-concurrencyModel=Multi"}, + Args: []string{"-concurrencyModel=Multi", "-logtostderr=false", "-stderrthreshold=FATAL"}, VolumeMounts: autoscalerVolumeMounts, }}, ServiceAccountName: "autoscaler", @@ -476,7 +476,7 @@ func TestMakeAutoscalerDeployment(t *testing.T) { Name: "SERVING_AUTOSCALER_PORT", Value: strconv.Itoa(AutoscalerPort), }}, - Args: []string{"-concurrencyModel=Multi"}, + Args: []string{"-concurrencyModel=Multi", "-logtostderr=false", "-stderrthreshold=FATAL"}, VolumeMounts: autoscalerVolumeMounts, }}, ServiceAccountName: "autoscaler", @@ -567,7 +567,7 @@ func TestMakeAutoscalerDeployment(t *testing.T) { Name: "SERVING_AUTOSCALER_PORT", Value: strconv.Itoa(AutoscalerPort), }}, - Args: []string{"-concurrencyModel=Multi"}, + Args: []string{"-concurrencyModel=Multi", "-logtostderr=false", "-stderrthreshold=FATAL"}, VolumeMounts: autoscalerVolumeMounts, }}, ServiceAccountName: "autoscaler", diff --git a/pkg/controller/revision/revision.go b/pkg/controller/revision/revision.go index 03885006d431..aea6850d40c0 100644 --- a/pkg/controller/revision/revision.go +++ b/pkg/controller/revision/revision.go @@ -53,7 +53,6 @@ import ( apierrs "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/util/runtime" appsv1listers "k8s.io/client-go/listers/apps/v1" corev1listers "k8s.io/client-go/listers/core/v1" "k8s.io/client-go/tools/cache" @@ -226,7 +225,7 @@ func (c *Controller) Reconcile(key string) error { // Convert the namespace/name string into a distinct namespace and name namespace, name, err := cache.SplitMetaNamespaceKey(key) if err != nil { - runtime.HandleError(fmt.Errorf("invalid resource key: %s", key)) + c.Logger.Errorf("invalid resource key: %s", key) return nil } @@ -238,7 +237,7 @@ func (c *Controller) Reconcile(key string) error { original, err := c.revisionLister.Revisions(namespace).Get(name) // The resource may no longer exist, in which case we stop processing. if apierrs.IsNotFound(err) { - runtime.HandleError(fmt.Errorf("revision %q in work queue no longer exists", key)) + logger.Errorf("revision %q in work queue no longer exists", key) return nil } else if err != nil { return err diff --git a/pkg/controller/route/route.go b/pkg/controller/route/route.go index e04817d74212..53e92e2f67cb 100644 --- a/pkg/controller/route/route.go +++ b/pkg/controller/route/route.go @@ -24,7 +24,6 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/equality" apierrs "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/util/runtime" corev1informers "k8s.io/client-go/informers/core/v1" corev1listers "k8s.io/client-go/listers/core/v1" "k8s.io/client-go/tools/cache" @@ -128,7 +127,7 @@ func (c *Controller) Reconcile(key string) error { // Convert the namespace/name string into a distinct namespace and name namespace, name, err := cache.SplitMetaNamespaceKey(key) if err != nil { - runtime.HandleError(fmt.Errorf("invalid resource key: %s", key)) + c.Logger.Errorf("invalid resource key: %s", key) return nil } @@ -139,7 +138,7 @@ func (c *Controller) Reconcile(key string) error { original, err := c.routeLister.Routes(namespace).Get(name) if apierrs.IsNotFound(err) { // The resource may no longer exist, in which case we stop processing. - runtime.HandleError(fmt.Errorf("route %q in work queue no longer exists", key)) + logger.Errorf("route %q in work queue no longer exists", key) return nil } else if err != nil { return err @@ -230,7 +229,6 @@ func (c *Controller) EnqueueReferringRoute(obj interface{}) { return } if config.Status.LatestReadyRevisionName == "" { - fmt.Printf("Configuration %s is not ready\n", config.Name) c.Logger.Infof("Configuration %s is not ready", config.Name) return } diff --git a/pkg/controller/service/service.go b/pkg/controller/service/service.go index 6d4a7c393eb8..e6b5f356afca 100644 --- a/pkg/controller/service/service.go +++ b/pkg/controller/service/service.go @@ -18,7 +18,6 @@ package service import ( "context" - "fmt" "reflect" "github.com/google/go-cmp/cmp" @@ -27,7 +26,6 @@ import ( "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/api/errors" apierrs "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/client-go/tools/cache" "github.com/knative/serving/pkg/apis/serving/v1alpha1" @@ -113,7 +111,7 @@ func (c *Controller) Reconcile(key string) error { // Convert the namespace/name string into a distinct namespace and name namespace, name, err := cache.SplitMetaNamespaceKey(key) if err != nil { - runtime.HandleError(fmt.Errorf("invalid resource key: %s", key)) + c.Logger.Errorf("invalid resource key: %s", key) return nil } @@ -125,7 +123,7 @@ func (c *Controller) Reconcile(key string) error { original, err := c.serviceLister.Services(namespace).Get(name) if apierrs.IsNotFound(err) { // The resource may no longer exist, in which case we stop processing. - runtime.HandleError(fmt.Errorf("service %q in work queue no longer exists", key)) + logger.Errorf("service %q in work queue no longer exists", key) return nil } else if err != nil { return err diff --git a/pkg/logging/config.go b/pkg/logging/config.go index a1086f537b9d..62753ffeae32 100644 --- a/pkg/logging/config.go +++ b/pkg/logging/config.go @@ -131,3 +131,22 @@ func levelFromString(level string) (*zapcore.Level, error) { } return &zapLevel, nil } + +// UpdateLevelFromConfigMap returns a helper func that can be used to update the logging level +// when a config map is updated +func UpdateLevelFromConfigMap(logger *zap.SugaredLogger, atomicLevel zap.AtomicLevel, levelKey string) func(configMap *corev1.ConfigMap) { + return func(configMap *corev1.ConfigMap) { + loggingConfig, err := NewConfigFromConfigMap(configMap) + if err != nil { + logger.Error("Failed to parse the logging configmap. Previous config map will be used.", zap.Error(err)) + return + } + + if level, ok := loggingConfig.LoggingLevel[levelKey]; ok { + if atomicLevel.Level() != level { + logger.Infof("Updating logging level for %v from %v to %v.", levelKey, atomicLevel.Level(), level) + atomicLevel.SetLevel(level) + } + } + } +} diff --git a/pkg/logging/config_test.go b/pkg/logging/config_test.go index 6b6324cbf383..76a2eeaa5115 100644 --- a/pkg/logging/config_test.go +++ b/pkg/logging/config_test.go @@ -234,3 +234,42 @@ func getTestConfig() (*Config, string, string) { }) return c, wantCfg, wantLevel } + +func TestUpdateLevelFromConfigMap(t *testing.T) { + logger, atomicLevel := NewLogger("", "debug") + want := zapcore.DebugLevel + if atomicLevel.Level() != zapcore.DebugLevel { + t.Fatalf("Expected initial logger level to %v, got: %v", want, atomicLevel.Level()) + } + + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: system.Namespace, + Name: "config-logging", + }, + Data: map[string]string{ + "zap-logger-config": "", + "loglevel.controller": "panic", + }, + } + + tests := []struct { + setLevel string + wantLevel zapcore.Level + }{ + {"info", zapcore.InfoLevel}, + {"error", zapcore.ErrorLevel}, + {"invalid", zapcore.ErrorLevel}, + {"debug", zapcore.DebugLevel}, + {"debug", zapcore.DebugLevel}, + } + + u := UpdateLevelFromConfigMap(logger, atomicLevel, "controller") + for _, tt := range tests { + cm.Data["loglevel.controller"] = tt.setLevel + u(cm) + if atomicLevel.Level() != tt.wantLevel { + t.Errorf("Invalid logging level. want: %v, got: %v", tt.wantLevel, atomicLevel.Level()) + } + } +} diff --git a/test/e2e-tests.sh b/test/e2e-tests.sh index 71758ff6fb42..8f3a7acad583 100755 --- a/test/e2e-tests.sh +++ b/test/e2e-tests.sh @@ -44,7 +44,7 @@ function create_istio() { function create_monitoring() { echo ">> Bringing up monitoring" kubectl apply -R -f config/monitoring/100-common \ - -f config/monitoring/150-elasticsearch-prod \ + -f config/monitoring/150-elasticsearch \ -f third_party/config/monitoring/common \ -f third_party/config/monitoring/elasticsearch \ -f config/monitoring/200-common \ @@ -68,7 +68,7 @@ function delete_istio() { function delete_monitoring() { echo ">> Bringing down monitoring" kubectl delete --ignore-not-found=true -f config/monitoring/100-common \ - -f config/monitoring/150-elasticsearch-prod \ + -f config/monitoring/150-elasticsearch \ -f third_party/config/monitoring/common \ -f third_party/config/monitoring/elasticsearch \ -f config/monitoring/200-common