From b43cf338def871e21c381b1090780c5362f4917e Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Wed, 20 May 2020 14:59:11 -0700 Subject: [PATCH] pkg/image/controller/scheduler: Restore some logging Reverting-ish the scheduler logging removed as part of c0b49058a7 (Remove debugging logs from scheduler component, not needed anymore, 2017-04-07). This should help troubleshoot scheduled ImageStreams that get stuck, like a cluster running the 4.3 07837c7258 (Merge pull request #91 from openshift-cherrypick-robot/cherry-pick-84-to-release-4.3, 2020-04-16) which had: $ oc describe is cli -n openshift Name: cli Namespace: openshift Created: 24 hours ago Labels: Annotations: Image Repository: default-route-openshift-image-registry.apps... Image Lookup: local=false Unique Images: 0 Tags: 1 latest updates automatically from registry quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:0a70026a8b193e61356054ab92ec825aac23c52ac5410c5e49c2412f92fe195b ! error: Import failed (InternalError): Internal error occurred: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:0a70026a8b193e61356054ab92ec825aac23c52ac5410c5e49c2412f92fe195b: Get https://quay.io/v2/openshift-release-dev/ocp-v4.0-art-dev/manifests/sha256:0a70026a8b193e61356054ab92ec825aac23c52ac5410c5e49c2412f92fe195b: received unexpected HTTP status: 500 Internal Server Error 24 hours ago But nothing useful in the controller logs: I0520 15:45:53.857152 1 controller_manager.go:42] Starting controllers on 0.0.0.0:8443 (07837c72) ... I0520 15:46:53.954352 1 scheduled_image_controller.go:67] Starting scheduled import controller ... W0520 15:47:13.662463 1 reflector.go:299] github.com/openshift/client-go/operator/informers/externalversions/factory.go:101: watch of *v1alpha1.ImageContentSourcePolicy ended with: an error on the server ("unable to decode an event from the watch stream: got short buffer with n=0, base=170, cap=2688") has prevented the request from succeeding ... I would expect... something... to show that the scheduled controller had the scheduled ImageStream in its queue and was attempting to retrieve it. The TODO I'm removing is from 01b8ae0ce4 (Improve godoc and add validation tests, 2016-01-28), but I think removing logging should be a non-goal (although I'm fine pushing logging up to higher V-levels as folks become more confident in the logged logic). --- pkg/image/controller/scheduler.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pkg/image/controller/scheduler.go b/pkg/image/controller/scheduler.go index 27b1c4833..58f9ea9fa 100644 --- a/pkg/image/controller/scheduler.go +++ b/pkg/image/controller/scheduler.go @@ -5,6 +5,7 @@ import ( utilwait "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/util/flowcontrol" + "k8s.io/klog" ) // NOTE: scheduler's semantics do not lend it for reuse elsewhere and its use in @@ -33,7 +34,6 @@ type bucket map[interface{}]interface{} // newScheduler creates a scheduler with bucketCount buckets, a rate limiter for restricting // the rate at which buckets are processed, and a function to invoke when items are scanned in // a bucket. -// TODO: remove DEBUG statements from this file once this logic has been adequately validated. func newScheduler(bucketCount int, bucketLimiter flowcontrol.RateLimiter, fn func(key, value interface{})) *scheduler { // Add one more bucket to serve as the "current" bucket bucketCount++ @@ -58,9 +58,11 @@ func (s *scheduler) RunUntil(ch <-chan struct{}) { func (s *scheduler) RunOnce() { key, value, last := s.next() if last { + klog.V(5).Infof("Image controller scheduler: waiting for limit") s.limiter.Accept() return } + klog.V(5).Infof("Image controller scheduler: handle %s", key) s.handle(key, value) } @@ -74,6 +76,7 @@ func (s *scheduler) at(inc int) int { func (s *scheduler) next() (interface{}, interface{}, bool) { s.mu.Lock() defer s.mu.Unlock() + klog.V(5).Infof("Image controller scheduler: queue (%d):\n %#v", s.position, s.buckets) last := s.buckets[s.position] // Grab the first item in the bucket, move it to the end and return it. @@ -84,6 +87,7 @@ func (s *scheduler) next() (interface{}, interface{}, bool) { } // The bucket was empty. Advance to the next bucket. s.position = s.at(1) + klog.V(5).Infof("Image controller scheduler: bucket was empty, advance to %d of %d buckets", s.position, len(s.buckets)) return nil, nil, true } @@ -116,6 +120,7 @@ func (s *scheduler) Add(key, value interface{}) { least = size } } + klog.V(5).Infof("Image controller scheduler: add %s to bucket %d", key, target) s.buckets[target][key] = value } @@ -126,13 +131,14 @@ func (s *scheduler) Remove(key, value interface{}) bool { defer s.mu.Unlock() match := true - for _, bucket := range s.buckets { + for i, bucket := range s.buckets { if value != nil { if old, ok := bucket[key]; ok && old != value { match = false continue } } + klog.V(5).Infof("Image controller scheduler: remove %s from bucket %d", key, i) delete(bucket, key) } return match