From 0c3dd77e3403bbc59398fa54c5000f759ed7bc12 Mon Sep 17 00:00:00 2001 From: Alexander Greene Date: Wed, 14 Jul 2021 13:28:46 -0700 Subject: [PATCH 1/2] Introduce continuous profiling This commit introduces a cronJob which extracts heap profiles from the OLM and Catalog Operator deployments from exposed services. These heap profiles are then saved in configMaps in the openshift-operator-lifecycle-manager namespace. Requests against the aformentioned services are made using an HTTPS request. The client certificate used by the cronJob is recycled with each run. Co-authored-by: Vu Dinh Co-authored-by: Ben Luddy Signed-off-by: Alexander Greene --- Makefile | 14 +- cmd/collect-profiles/main.go | 371 ++++++++++++++++++ cmd/collect-profiles/profiling_test.go | 201 ++++++++++ go.mod | 2 + manifests/0000_50_olm_00-pprof-config.yaml | 13 + manifests/0000_50_olm_00-pprof-rbac.yaml | 44 +++ manifests/0000_50_olm_00-pprof-secret.yaml | 14 + ...00_50_olm_07-collect-profiles.cronjob.yaml | 49 +++ ...operator.deployment.ibm-cloud-managed.yaml | 7 +- ...000_50_olm_07-olm-operator.deployment.yaml | 7 +- ...operator.deployment.ibm-cloud-managed.yaml | 7 +- ...50_olm_08-catalog-operator.deployment.yaml | 7 +- operator-lifecycle-manager.Dockerfile | 1 + pkg/profiling/config/config.go | 58 +++ scripts/catalog-deployment.patch.yaml | 11 - scripts/generate_crds_manifests.sh | 132 +++++++ scripts/olm-deployment.patch.yaml | 11 - values.yaml | 4 +- vendor/modules.txt | 2 + 19 files changed, 901 insertions(+), 54 deletions(-) create mode 100644 cmd/collect-profiles/main.go create mode 100644 cmd/collect-profiles/profiling_test.go create mode 100644 manifests/0000_50_olm_00-pprof-config.yaml create mode 100644 manifests/0000_50_olm_00-pprof-rbac.yaml create mode 100644 manifests/0000_50_olm_00-pprof-secret.yaml create mode 100644 manifests/0000_50_olm_07-collect-profiles.cronjob.yaml create mode 100644 pkg/profiling/config/config.go diff --git a/Makefile b/Makefile index 85af59acdb..6c81f9a8ef 100644 --- a/Makefile +++ b/Makefile @@ -22,12 +22,11 @@ OLM_PKG := $(GO_PKG)/operator-lifecycle-manager API_PKG := $(GO_PKG)/api ROOT_PKG := github.com/openshift/operator-framework-olm -PSM := $(addprefix bin/, psm) +COLLECT_PROFILES_CMD := $(addprefix bin/, collect-profiles) OPM := $(addprefix bin/, opm) OLM_CMDS := $(shell go list -mod=vendor $(OLM_PKG)/cmd/...) -PSM_CMDS := $(shell go list -mod=vendor github.com/openshift/operator-framework-olm/cmd/...) +PSM_CMD := $(addprefix bin/, psm) REGISTRY_CMDS := $(addprefix bin/, $(shell ls staging/operator-registry/cmd | grep -v opm)) - # Phony prerequisite for targets that rely on the go build cache to determine staleness. .PHONY: FORCE FORCE: @@ -56,7 +55,7 @@ build/registry: $(MAKE) $(REGISTRY_CMDS) $(OPM) build/olm: - $(MAKE) $(PSM_CMDS) $(OLM_CMDS) + $(MAKE) $(PSM_CMD) $(OLM_CMDS) $(COLLECT_PROFILES_CMD) $(OPM): version_flags=-ldflags "-X '$(REGISTRY_PKG)/cmd/opm/version.gitCommit=$(GIT_COMMIT)' -X '$(REGISTRY_PKG)/cmd/opm/version.opmVersion=$(OPM_VERSION)' -X '$(REGISTRY_PKG)/cmd/opm/version.buildDate=$(BUILD_DATE)'" $(OPM): @@ -70,8 +69,11 @@ $(OLM_CMDS): version_flags=-ldflags "-X $(OLM_PKG)/pkg/version.GitCommit=$(GIT_C $(OLM_CMDS): go build $(version_flags) $(GO_BUILD_OPTS) $(GO_BUILD_TAGS) -o bin/$(shell basename $@) $@ -$(PSM_CMDS): FORCE - go build $(GO_BUILD_OPTS) $(GO_BUILD_TAGS) -o $(PSM) $(ROOT_PKG)/cmd/... +$(PSM_CMD): FORCE + go build $(GO_BUILD_OPTS) $(GO_BUILD_TAGS) -o $(PSM_CMD) $(ROOT_PKG)/cmd/package-server-manager + +$(COLLECT_PROFILES_CMD): FORCE + go build $(GO_BUILD_OPTS) $(GO_BUILD_TAGS) -o $(COLLECT_PROFILES_CMD) $(ROOT_PKG)/cmd/collect-profiles .PHONY: cross cross: version_flags=-ldflags "-X '$(REGISTRY_PKG)/cmd/opm/version.gitCommit=$(GIT_COMMIT)' -X '$(REGISTRY_PKG)/cmd/opm/version.opmVersion=$(OPM_VERSION)' -X '$(REGISTRY_PKG)/cmd/opm/version.buildDate=$(BUILD_DATE)'" diff --git a/cmd/collect-profiles/main.go b/cmd/collect-profiles/main.go new file mode 100644 index 0000000000..a7e3a673bc --- /dev/null +++ b/cmd/collect-profiles/main.go @@ -0,0 +1,371 @@ +package main + +import ( + "bytes" + "context" + "crypto/rand" + "crypto/rsa" + "crypto/tls" + "crypto/x509" + "crypto/x509/pkix" + "encoding/pem" + "fmt" + "io" + "math/big" + "net/http" + "net/url" + "os" + "path/filepath" + "strings" + "time" + + "github.com/sirupsen/logrus" + "github.com/spf13/cobra" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/openshift/operator-framework-olm/pkg/profiling/config" +) + +const ( + profileConfigMapLabelKey = "olm.openshift.io/pprof" + olmNamespace = "openshift-operator-lifecycle-manager" + pprofSecretName = "pprof-cert" +) + +var ( + rootCmd = newCmd() + + // Used for flags + namespace string + configMountPath string + certMountPath string +) + +func init() { + rootCmd.PersistentFlags().StringVarP(&namespace, "namespace", "n", "default", "The Kubernetes namespace where the generated configMaps should exist. Defaults to \"default\".") + rootCmd.MarkFlagRequired("namespace") + rootCmd.PersistentFlags().StringVarP(&configMountPath, "config-mount-path", "c", "/etc/config", "The path to the collect-profiles configuration file.") + rootCmd.MarkFlagRequired("config-mount-path") + rootCmd.PersistentFlags().StringVarP(&certMountPath, "cert-mount-path", "", "/var/run/secrets/serving-cert", "The path to the tls cert used by the client making https requests against the pprof URLs.") +} + +func main() { + Execute() +} + +func Execute() { + if err := rootCmd.Execute(); err != nil { + logrus.Fatal(err) + os.Exit(1) + } +} + +func getTruePointer() *bool { + trueBool := true + return &trueBool +} + +func newCmd() *cobra.Command { + var cfg config.Configuration + return &cobra.Command{ + Use: "collect-profiles configMapName:url", + Short: "Retrieves the pprof data from a URL and stores it in a configMap.", + Long: `The collect-profiles command makes https requests against pprof URLs + provided as arguments and stores that information in immutable configMaps. + + # Example command with multiple arguments + ./collect-profiles -n - openshift-operator-lifecycle-manager \ + - --config-mount-path \ + - /etc/config \ + - --cert-mount-path \ + - /var/run/secrets/serving-cert \ + - olm-operator-heap-:https://olm-operator-metrics:8443/debug/pprof/heap \ + - catalog-operator-heap-:https://catalog-operator-metrics:8443/debug/pprof/heap + `, + SilenceUsage: true, + PersistentPreRunE: func(*cobra.Command, []string) error { + return cfg.Load() + }, + RunE: func(cmd *cobra.Command, args []string) error { + + if len(args) == 0 { + logrus.Info("No arguments provided, exiting") + return nil + } + + jobConfig, err := config.GetConfig(configMountPath) + if err != nil { + logrus.Infof("error retrieving job config") + return err + } + + // Exit if job is disabled + if jobConfig.Disabled { + logrus.Infof("CronJob disabled, exiting") + return nil + } + + // Validate input + validatedArguments := make([]*argument, len(args)) + for i, arg := range args { + a, err := newArgument(arg) + if err != nil { + return err + } + validatedArguments[i] = a + } + + // Get existing configmaps + existingConfigMaps := &corev1.ConfigMapList{} + if err := cfg.Client.List(cmd.Context(), existingConfigMaps, client.InNamespace(namespace), client.HasLabels{profileConfigMapLabelKey}); err != nil { + return err + } + + newestConfigMaps, expiredConfigMaps := separateConfigMapsIntoNewestAndExpired(existingConfigMaps.Items) + + // Attempt to delete all but the newest configMaps generated by this job + errs := []error{} + for _, cm := range expiredConfigMaps { + if err := cfg.Client.Delete(cmd.Context(), &cm); err != nil { + errs = append(errs, err) + continue + } + logrus.Infof("Successfully deleted configMap %s/%s", cm.GetNamespace(), cm.GetName()) + } + + // If a delete call failed, abort to avoid creating new configMaps + if len(errs) != 0 { + return fmt.Errorf("error deleting expired pprof configMaps: %v", errs) + } + + certPath := filepath.Join(certMountPath, corev1.TLSCertKey) + keyPath := filepath.Join(certMountPath, corev1.TLSPrivateKeyKey) + + if err := verifyCertAndKeyExist(certPath, keyPath); err != nil { + logrus.Infof("error verifying provided cert and key: %v", err) + logrus.Info("generating a new cert and key") + return populateServingCert(cmd.Context(), cfg.Client) + } + + httpClient, err := getHttpClient(certPath, keyPath) + if err != nil { + return err + } + + // Track successfully created configMaps by generateName for each endpoint being scrapped. + createdCM := map[string]struct{}{} + + for _, a := range validatedArguments { + b, err := requestURLBody(httpClient, a.url) + if err != nil { + logrus.Infof("error retrieving pprof profile: %v", err) + continue + } + + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: a.generateName, + Namespace: namespace, + Labels: map[string]string{ + profileConfigMapLabelKey: "", + }, + }, + Immutable: getTruePointer(), + BinaryData: map[string][]byte{ + "profile.pb.gz": b, + }, + } + + if err := cfg.Client.Create(cmd.Context(), cm); err != nil { + logrus.Errorf("error created configMap %s/%s: %v", cm.GetNamespace(), cm.GetName(), err) + continue + } + + logrus.Infof("Successfully created configMap %s/%s", cm.GetNamespace(), cm.GetName()) + createdCM[a.generateName] = struct{}{} + } + + // Delete the configMaps which are no longer the newest + for _, cm := range newestConfigMaps { + // Don't delete ConfigMaps that were not replaced + // Also prevents deletes of configMaps with generateNames not included in command + if _, ok := createdCM[cm.GenerateName]; !ok { + continue + } + if err := cfg.Client.Delete(cmd.Context(), &cm); err != nil { + errs = append(errs, err) + continue + } + logrus.Infof("Successfully deleted configMap %s/%s", cm.GetNamespace(), cm.GetName()) + } + + if len(errs) != 0 { + return fmt.Errorf("error deleting existing pprof configMaps: %v", errs) + } + + // Update serving cert after a successful run + return populateServingCert(cmd.Context(), cfg.Client) + }, + } +} + +func verifyCertAndKeyExist(certPath, keyPath string) error { + fi, err := os.Stat(certPath) + if err != nil { + return err + } + if fi.Size() == 0 { + return fmt.Errorf("cert file should not be empty") + } + + fi, err = os.Stat(keyPath) + if err != nil { + return err + } + if fi.Size() == 0 { + return fmt.Errorf("key file should not be empty") + } + return nil +} + +func separateConfigMapsIntoNewestAndExpired(configMaps []corev1.ConfigMap) (newestCMs []corev1.ConfigMap, expiredCMs []corev1.ConfigMap) { + // Group ConfigMaps by GenerateName + newestConfigMaps := map[string]corev1.ConfigMap{} + for _, cm := range configMaps { + if _, ok := newestConfigMaps[cm.GenerateName]; !ok { + newestConfigMaps[cm.GenerateName] = cm + continue + } + if cm.CreationTimestamp.After(newestConfigMaps[cm.GenerateName].CreationTimestamp.Time) { + newestConfigMaps[cm.GenerateName], cm = cm, newestConfigMaps[cm.GenerateName] + } + expiredCMs = append(expiredCMs, cm) + } + + for _, v := range newestConfigMaps { + newestCMs = append(newestCMs, v) + } + + return newestCMs, expiredCMs +} + +type argument struct { + generateName string + url *url.URL +} + +func newArgument(s string) (*argument, error) { + splitStrings := strings.SplitN(s, ":", 2) + if len(splitStrings) != 2 { + return nil, fmt.Errorf("%s is an invalid argument, should match configMapName:url", s) + } + + url, err := url.Parse(splitStrings[1]) + if err != nil { + return nil, err + } + + if strings.ToLower(url.Scheme) != "https" { + return nil, fmt.Errorf("URL Scheme must be HTTPS") + } + + arg := &argument{ + generateName: splitStrings[0], + url: url, + } + + return arg, nil +} + +func getHttpClient(certPath, keyPath string) (*http.Client, error) { + cert, err := tls.LoadX509KeyPair(certPath, keyPath) + if err != nil { + return nil, err + } + return &http.Client{ + Transport: &http.Transport{ + TLSClientConfig: &tls.Config{ + InsecureSkipVerify: true, + Certificates: []tls.Certificate{cert}, + }, + }, + }, nil +} + +func requestURLBody(httpClient *http.Client, u *url.URL) ([]byte, error) { + response, err := httpClient.Do(&http.Request{ + Method: http.MethodGet, + URL: u, + }) + if err != nil { + return nil, err + } + + if response.StatusCode != http.StatusOK { + return nil, fmt.Errorf("%s responded with %d status code instead of %d", u, response.StatusCode, http.StatusOK) + } + + var b bytes.Buffer + if _, err := io.Copy(&b, response.Body); err != nil { + return nil, fmt.Errorf("error reading response body: %v", err) + } + + return b.Bytes(), nil +} + +func populateServingCert(ctx context.Context, client client.Client) error { + secret := &corev1.Secret{} + err := client.Get(ctx, types.NamespacedName{Namespace: olmNamespace, Name: pprofSecretName}, secret) + if err != nil { + return err + } + + cert, privateKey, err := getCertAndKey() + if err != nil { + return err + } + + secret.Data[corev1.TLSCertKey] = cert + secret.Data[corev1.TLSPrivateKeyKey] = privateKey + return client.Update(ctx, secret) +} + +func getCertAndKey() ([]byte, []byte, error) { + cert := &x509.Certificate{ + SerialNumber: big.NewInt(1658), + Subject: pkix.Name{ + Organization: []string{"Red Hat, Inc."}, + }, + NotBefore: time.Now(), + NotAfter: time.Now().Add(time.Hour), + ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageClientAuth}, + KeyUsage: x509.KeyUsageDigitalSignature, + } + + caPrivKey, err := rsa.GenerateKey(rand.Reader, 4096) + if err != nil { + return nil, nil, err + } + + caBytes, err := x509.CreateCertificate(rand.Reader, cert, cert, &caPrivKey.PublicKey, caPrivKey) + if err != nil { + return nil, nil, err + } + + caPEM := new(bytes.Buffer) + pem.Encode(caPEM, &pem.Block{ + Type: "CERTIFICATE", + Bytes: caBytes, + }) + + caPrivKeyPEM := new(bytes.Buffer) + pem.Encode(caPrivKeyPEM, &pem.Block{ + Type: "RSA PRIVATE KEY", + Bytes: x509.MarshalPKCS1PrivateKey(caPrivKey), + }) + + return caPEM.Bytes(), caPrivKeyPEM.Bytes(), nil +} diff --git a/cmd/collect-profiles/profiling_test.go b/cmd/collect-profiles/profiling_test.go new file mode 100644 index 0000000000..8efa7cf9bf --- /dev/null +++ b/cmd/collect-profiles/profiling_test.go @@ -0,0 +1,201 @@ +package main + +import ( + "reflect" + "testing" + "time" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func TestSeparateConfigMapsIntoNewestAndExpired(t *testing.T) { + nowTime := metav1.Now() + futureTime := metav1.NewTime(nowTime.Add(time.Hour)) + var tests = []struct { + name string + arg, wantNewest, wantExpired []corev1.ConfigMap + }{ + { + name: "empty", + arg: []corev1.ConfigMap{}, + wantNewest: []corev1.ConfigMap{}, + wantExpired: []corev1.ConfigMap{}, + }, + { + name: "single newest configMap no expired configMaps", + arg: []corev1.ConfigMap{ + { + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "foo-", + CreationTimestamp: futureTime, + }, + }, + }, + wantNewest: []corev1.ConfigMap{ + { + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "foo-", + CreationTimestamp: futureTime, + }, + }, + }, + wantExpired: []corev1.ConfigMap{}, + }, + { + name: "multiple newest configMap no expired configMaps", + arg: []corev1.ConfigMap{ + { + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "foo-", + CreationTimestamp: futureTime, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "bar-", + CreationTimestamp: futureTime, + }, + }, + }, + wantNewest: []corev1.ConfigMap{ + { + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "foo-", + CreationTimestamp: futureTime, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "bar-", + CreationTimestamp: futureTime, + }, + }, + }, + wantExpired: []corev1.ConfigMap{}, + }, + { + name: "single newest configMap one expired configMap", + arg: []corev1.ConfigMap{ + { + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "foo-", + CreationTimestamp: futureTime, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "foo-", + CreationTimestamp: nowTime, + }, + }, + }, + wantNewest: []corev1.ConfigMap{ + { + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "foo-", + CreationTimestamp: futureTime, + }, + }, + }, + wantExpired: []corev1.ConfigMap{ + { + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "foo-", + CreationTimestamp: nowTime, + }, + }, + }, + }, + { + name: "multiple newest configMaps and multiple expired configMaps", + arg: []corev1.ConfigMap{ + { + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "foo-", + CreationTimestamp: futureTime, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "foo-", + CreationTimestamp: nowTime, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "bar-", + CreationTimestamp: futureTime, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "bar-", + CreationTimestamp: nowTime, + }, + }, + }, + wantNewest: []corev1.ConfigMap{ + { + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "foo-", + CreationTimestamp: futureTime, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "bar-", + CreationTimestamp: futureTime, + }, + }, + }, + wantExpired: []corev1.ConfigMap{ + { + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "foo-", + CreationTimestamp: nowTime, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "bar-", + CreationTimestamp: nowTime, + }, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gotNewest, gotExpired := separateConfigMapsIntoNewestAndExpired(tt.arg) + + if !equal(gotNewest, tt.wantNewest) { + t.Errorf("separateConfigMapsIntoNewestAndExpired contains unexpected newest CMs. Got %v, want %v", gotNewest, tt.wantNewest) + } + + if !equal(gotExpired, tt.wantExpired) { + t.Errorf("separateConfigMapsIntoNewestAndExpired contains unexpected expired CMs. Got %v, want %v", gotExpired, tt.wantExpired) + } + + }) + } +} + +func equal(a, b []corev1.ConfigMap) bool { + if len(a) != len(b) { + return false + } + for i := range a { + found := false + for j := range b { + if reflect.DeepEqual(a[i], b[j]) { + found = true + break + } + } + if !found { + return false + } + } + return true +} diff --git a/go.mod b/go.mod index 281469c1e4..5dc4ab812f 100644 --- a/go.mod +++ b/go.mod @@ -15,10 +15,12 @@ require ( github.com/operator-framework/api v0.10.1 github.com/operator-framework/operator-lifecycle-manager v0.0.0-00010101000000-000000000000 github.com/operator-framework/operator-registry v1.17.5 + github.com/sirupsen/logrus v1.8.1 github.com/spf13/cobra v1.1.3 github.com/stretchr/testify v1.7.0 google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.1.0 google.golang.org/protobuf v1.27.1 + gopkg.in/yaml.v2 v2.4.0 helm.sh/helm/v3 v3.6.2 k8s.io/api v0.22.0-beta.0 k8s.io/apimachinery v0.22.0-beta.0 diff --git a/manifests/0000_50_olm_00-pprof-config.yaml b/manifests/0000_50_olm_00-pprof-config.yaml new file mode 100644 index 0000000000..ebf2a73ce6 --- /dev/null +++ b/manifests/0000_50_olm_00-pprof-config.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + annotations: + include.release.openshift.io/ibm-cloud-managed: "true" + include.release.openshift.io/self-managed-high-availability: "true" + include.release.openshift.io/single-node-developer: "true" + release.openshift.io/create-only: "true" + name: collect-profiles-config + namespace: openshift-operator-lifecycle-manager +data: + pprof-config.yaml: | + disabled: False diff --git a/manifests/0000_50_olm_00-pprof-rbac.yaml b/manifests/0000_50_olm_00-pprof-rbac.yaml new file mode 100644 index 0000000000..35e09b6f3b --- /dev/null +++ b/manifests/0000_50_olm_00-pprof-rbac.yaml @@ -0,0 +1,44 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + annotations: + include.release.openshift.io/ibm-cloud-managed: "true" + include.release.openshift.io/self-managed-high-availability: "true" + include.release.openshift.io/single-node-developer: "true" + name: collect-profiles + namespace: openshift-operator-lifecycle-manager +rules: + - apiGroups: [""] + resources: ["configmaps"] + verbs: ["get", "list", "create", "delete"] + - apiGroups: [""] + resources: ["secrets"] + verbs: ["get", "update"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + annotations: + include.release.openshift.io/ibm-cloud-managed: "true" + include.release.openshift.io/self-managed-high-availability: "true" + include.release.openshift.io/single-node-developer: "true" + name: collect-profiles + namespace: openshift-operator-lifecycle-manager +subjects: + - kind: ServiceAccount + name: collect-profiles + namespace: openshift-operator-lifecycle-manager +roleRef: + kind: Role + name: collect-profiles + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + annotations: + include.release.openshift.io/ibm-cloud-managed: "true" + include.release.openshift.io/self-managed-high-availability: "true" + include.release.openshift.io/single-node-developer: "true" + name: collect-profiles + namespace: openshift-operator-lifecycle-manager diff --git a/manifests/0000_50_olm_00-pprof-secret.yaml b/manifests/0000_50_olm_00-pprof-secret.yaml new file mode 100644 index 0000000000..75f211d7ed --- /dev/null +++ b/manifests/0000_50_olm_00-pprof-secret.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: Secret +metadata: + annotations: + include.release.openshift.io/ibm-cloud-managed: "true" + include.release.openshift.io/self-managed-high-availability: "true" + include.release.openshift.io/single-node-developer: "true" + release.openshift.io/create-only: "true" + name: pprof-cert + namespace: openshift-operator-lifecycle-manager +type: kubernetes.io/tls +data: + tls.crt: "" + tls.key: "" diff --git a/manifests/0000_50_olm_07-collect-profiles.cronjob.yaml b/manifests/0000_50_olm_07-collect-profiles.cronjob.yaml new file mode 100644 index 0000000000..29b99f87f7 --- /dev/null +++ b/manifests/0000_50_olm_07-collect-profiles.cronjob.yaml @@ -0,0 +1,49 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + annotations: + include.release.openshift.io/ibm-cloud-managed: "true" + include.release.openshift.io/self-managed-high-availability: "true" + include.release.openshift.io/single-node-developer: "true" + name: collect-profiles + namespace: openshift-operator-lifecycle-manager +spec: + schedule: "*/15 * * * *" + jobTemplate: + spec: + template: + spec: + serviceAccountName: collect-profiles + priorityClassName: openshift-user-critical + containers: + - name: collect-profiles + image: quay.io/operator-framework/olm@sha256:de396b540b82219812061d0d753440d5655250c621c753ed1dc67d6154741607 + imagePullPolicy: IfNotPresent + command: + - bin/collect-profiles + args: + - -n + - openshift-operator-lifecycle-manager + - --config-mount-path + - /etc/config + - --cert-mount-path + - /var/run/secrets/serving-cert + - olm-operator-heap-:https://olm-operator-metrics:8443/debug/pprof/heap + - catalog-operator-heap-:https://catalog-operator-metrics:8443/debug/pprof/heap + volumeMounts: + - mountPath: /etc/config + name: config-volume + - mountPath: /var/run/secrets/serving-cert + name: secret-volume + resources: + requests: + cpu: 10m + memory: 80Mi + volumes: + - name: config-volume + configMap: + name: collect-profiles-config + - name: secret-volume + secret: + secretName: pprof-cert + restartPolicy: Never diff --git a/manifests/0000_50_olm_07-olm-operator.deployment.ibm-cloud-managed.yaml b/manifests/0000_50_olm_07-olm-operator.deployment.ibm-cloud-managed.yaml index 1a62fcd2d5..203389beeb 100644 --- a/manifests/0000_50_olm_07-olm-operator.deployment.ibm-cloud-managed.yaml +++ b/manifests/0000_50_olm_07-olm-operator.deployment.ibm-cloud-managed.yaml @@ -28,10 +28,7 @@ spec: secretName: olm-operator-serving-cert - name: profile-collector-cert secret: - secretName: olm-operator-serving-cert - - name: serving-cert - secret: - secretName: olm-operator-serving-cert + secretName: pprof-cert containers: - name: olm-operator volumeMounts: @@ -41,8 +38,6 @@ spec: - name: profile-collector-cert mountPath: "/profile-collector-cert" readOnly: true - - mountPath: /var/run/secrets/serving-cert - name: serving-cert command: - /bin/olm args: diff --git a/manifests/0000_50_olm_07-olm-operator.deployment.yaml b/manifests/0000_50_olm_07-olm-operator.deployment.yaml index 1a23661fae..b699206559 100644 --- a/manifests/0000_50_olm_07-olm-operator.deployment.yaml +++ b/manifests/0000_50_olm_07-olm-operator.deployment.yaml @@ -29,10 +29,7 @@ spec: secretName: olm-operator-serving-cert - name: profile-collector-cert secret: - secretName: olm-operator-serving-cert - - name: serving-cert - secret: - secretName: olm-operator-serving-cert + secretName: pprof-cert containers: - name: olm-operator volumeMounts: @@ -42,8 +39,6 @@ spec: - name: profile-collector-cert mountPath: "/profile-collector-cert" readOnly: true - - mountPath: /var/run/secrets/serving-cert - name: serving-cert command: - /bin/olm args: diff --git a/manifests/0000_50_olm_08-catalog-operator.deployment.ibm-cloud-managed.yaml b/manifests/0000_50_olm_08-catalog-operator.deployment.ibm-cloud-managed.yaml index f5da0de00e..7ade5acae9 100644 --- a/manifests/0000_50_olm_08-catalog-operator.deployment.ibm-cloud-managed.yaml +++ b/manifests/0000_50_olm_08-catalog-operator.deployment.ibm-cloud-managed.yaml @@ -28,10 +28,7 @@ spec: secretName: catalog-operator-serving-cert - name: profile-collector-cert secret: - secretName: catalog-operator-serving-cert - - name: serving-cert - secret: - secretName: catalog-operator-serving-cert + secretName: pprof-cert containers: - name: catalog-operator volumeMounts: @@ -41,8 +38,6 @@ spec: - name: profile-collector-cert mountPath: "/profile-collector-cert" readOnly: true - - mountPath: /var/run/secrets/serving-cert - name: serving-cert command: - /bin/catalog args: diff --git a/manifests/0000_50_olm_08-catalog-operator.deployment.yaml b/manifests/0000_50_olm_08-catalog-operator.deployment.yaml index fde9d56591..07557ef036 100644 --- a/manifests/0000_50_olm_08-catalog-operator.deployment.yaml +++ b/manifests/0000_50_olm_08-catalog-operator.deployment.yaml @@ -29,10 +29,7 @@ spec: secretName: catalog-operator-serving-cert - name: profile-collector-cert secret: - secretName: catalog-operator-serving-cert - - name: serving-cert - secret: - secretName: catalog-operator-serving-cert + secretName: pprof-cert containers: - name: catalog-operator volumeMounts: @@ -42,8 +39,6 @@ spec: - name: profile-collector-cert mountPath: "/profile-collector-cert" readOnly: true - - mountPath: /var/run/secrets/serving-cert - name: serving-cert command: - /bin/catalog args: diff --git a/operator-lifecycle-manager.Dockerfile b/operator-lifecycle-manager.Dockerfile index 3264ba01a1..7d511222a6 100644 --- a/operator-lifecycle-manager.Dockerfile +++ b/operator-lifecycle-manager.Dockerfile @@ -22,6 +22,7 @@ LABEL io.openshift.release.operator=true # Copy the binary to a standard location where it will run. COPY --from=builder /build/bin/olm /bin/olm COPY --from=builder /build/bin/catalog /bin/catalog +COPY --from=builder /build/bin/collect-profiles /bin/collect-profiles COPY --from=builder /build/bin/package-server /bin/package-server COPY --from=builder /build/bin/cpb /bin/cpb COPY --from=builder /build/bin/psm /bin/psm diff --git a/pkg/profiling/config/config.go b/pkg/profiling/config/config.go new file mode 100644 index 0000000000..e7111ea12f --- /dev/null +++ b/pkg/profiling/config/config.go @@ -0,0 +1,58 @@ +package config + +import ( + "io/ioutil" + "path/filepath" + + "gopkg.in/yaml.v2" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/rest" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +type Configuration struct { + RESTConfig *rest.Config + Client client.Client + Scheme *runtime.Scheme +} + +func (c *Configuration) Load() error { + // creates the in-cluster config + restConfig, err := rest.InClusterConfig() + if err != nil { + return err + } + + sch := scheme.Scheme + cl, err := client.New(restConfig, client.Options{ + Scheme: sch, + }) + if err != nil { + return err + } + + c.Scheme = scheme.Scheme + c.Client = cl + c.RESTConfig = restConfig + + return nil +} + +type config struct { + Disabled bool `yaml:"disabled"` +} + +func GetConfig(path string) (*config, error) { + data, err := ioutil.ReadFile(filepath.Join(path, "pprof-config.yaml")) + if err != nil { + return nil, err + } + + cfg := &config{} + if err := yaml.Unmarshal(data, &cfg); err != nil { + return nil, err + } + + return cfg, nil +} diff --git a/scripts/catalog-deployment.patch.yaml b/scripts/catalog-deployment.patch.yaml index 3f026d4144..dbf206cfe4 100644 --- a/scripts/catalog-deployment.patch.yaml +++ b/scripts/catalog-deployment.patch.yaml @@ -9,14 +9,3 @@ value: name: RELEASE_VERSION value: "0.0.1-snapshot" -- command: update - path: spec.template.spec.containers[0].volumeMounts[+] - value: - mountPath: /var/run/secrets/serving-cert - name: serving-cert -- command: update - path: spec.template.spec.volumes[+] - value: - name: serving-cert - secret: - secretName: catalog-operator-serving-cert diff --git a/scripts/generate_crds_manifests.sh b/scripts/generate_crds_manifests.sh index 986e494657..ad618dbb10 100755 --- a/scripts/generate_crds_manifests.sh +++ b/scripts/generate_crds_manifests.sh @@ -162,6 +162,138 @@ spec: tolerationSeconds: 120 EOF +cat << EOF > manifests/0000_50_olm_00-pprof-config.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + annotations: + include.release.openshift.io/ibm-cloud-managed: "true" + include.release.openshift.io/self-managed-high-availability: "true" + include.release.openshift.io/single-node-developer: "true" + release.openshift.io/create-only: "true" + name: collect-profiles-config + namespace: openshift-operator-lifecycle-manager +data: + pprof-config.yaml: | + disabled: False +EOF + +cat << EOF > manifests/0000_50_olm_00-pprof-rbac.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + annotations: + include.release.openshift.io/ibm-cloud-managed: "true" + include.release.openshift.io/self-managed-high-availability: "true" + include.release.openshift.io/single-node-developer: "true" + name: collect-profiles + namespace: openshift-operator-lifecycle-manager +rules: +- apiGroups: [""] + resources: ["configmaps"] + verbs: ["get", "list", "create", "delete"] +- apiGroups: [""] + resources: ["secrets"] + verbs: ["get", "update"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + annotations: + include.release.openshift.io/ibm-cloud-managed: "true" + include.release.openshift.io/self-managed-high-availability: "true" + include.release.openshift.io/single-node-developer: "true" + name: collect-profiles + namespace: openshift-operator-lifecycle-manager +subjects: +- kind: ServiceAccount + name: collect-profiles + namespace: openshift-operator-lifecycle-manager +roleRef: + kind: Role + name: collect-profiles + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + annotations: + include.release.openshift.io/ibm-cloud-managed: "true" + include.release.openshift.io/self-managed-high-availability: "true" + include.release.openshift.io/single-node-developer: "true" + name: collect-profiles + namespace: openshift-operator-lifecycle-manager +EOF + +cat << EOF > manifests/0000_50_olm_00-pprof-secret.yaml +apiVersion: v1 +kind: Secret +metadata: + annotations: + include.release.openshift.io/ibm-cloud-managed: "true" + include.release.openshift.io/self-managed-high-availability: "true" + include.release.openshift.io/single-node-developer: "true" + release.openshift.io/create-only: "true" + name: pprof-cert + namespace: openshift-operator-lifecycle-manager +type: kubernetes.io/tls +data: + tls.crt: "" + tls.key: "" +EOF + +cat << EOF > manifests/0000_50_olm_07-collect-profiles.cronjob.yaml +apiVersion: batch/v1 +kind: CronJob +metadata: + annotations: + include.release.openshift.io/ibm-cloud-managed: "true" + include.release.openshift.io/self-managed-high-availability: "true" + include.release.openshift.io/single-node-developer: "true" + name: collect-profiles + namespace: openshift-operator-lifecycle-manager +spec: + schedule: "*/15 * * * *" + jobTemplate: + spec: + template: + spec: + serviceAccountName: collect-profiles + priorityClassName: openshift-user-critical + containers: + - name: collect-profiles + image: quay.io/operator-framework/olm@sha256:de396b540b82219812061d0d753440d5655250c621c753ed1dc67d6154741607 + imagePullPolicy: IfNotPresent + command: + - bin/collect-profiles + args: + - -n + - openshift-operator-lifecycle-manager + - --config-mount-path + - /etc/config + - --cert-mount-path + - /var/run/secrets/serving-cert + - olm-operator-heap-:https://olm-operator-metrics:8443/debug/pprof/heap + - catalog-operator-heap-:https://catalog-operator-metrics:8443/debug/pprof/heap + volumeMounts: + - mountPath: /etc/config + name: config-volume + - mountPath: /var/run/secrets/serving-cert + name: secret-volume + resources: + requests: + cpu: 10m + memory: 80Mi + volumes: + - name: config-volume + configMap: + name: collect-profiles-config + - name: secret-volume + secret: + secretName: pprof-cert + restartPolicy: Never +EOF + add_ibm_managed_cloud_annotations "${ROOT_DIR}/manifests" # requires gnu sed if on mac diff --git a/scripts/olm-deployment.patch.yaml b/scripts/olm-deployment.patch.yaml index 1da81a0332..dbf206cfe4 100644 --- a/scripts/olm-deployment.patch.yaml +++ b/scripts/olm-deployment.patch.yaml @@ -9,14 +9,3 @@ value: name: RELEASE_VERSION value: "0.0.1-snapshot" -- command: update - path: spec.template.spec.containers[0].volumeMounts[+] - value: - mountPath: /var/run/secrets/serving-cert - name: serving-cert -- command: update - path: spec.template.spec.volumes[+] - value: - name: serving-cert - secret: - secretName: olm-operator-serving-cert diff --git a/values.yaml b/values.yaml index 7b476635ba..15da97f70b 100644 --- a/values.yaml +++ b/values.yaml @@ -15,7 +15,7 @@ olm: service: internalPort: 8443 externalPort: 8443 - clientCASecret: olm-operator-serving-cert + clientCASecret: pprof-cert nodeSelector: kubernetes.io/os: linux node-role.kubernetes.io/master: "" @@ -45,7 +45,7 @@ catalog: service: internalPort: 8443 externalPort: 8443 - clientCASecret: catalog-operator-serving-cert + clientCASecret: pprof-cert tlsSecret: catalog-operator-serving-cert nodeSelector: kubernetes.io/os: linux diff --git a/vendor/modules.txt b/vendor/modules.txt index dab38fd19b..13672e1f0a 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -658,6 +658,7 @@ github.com/shopspring/decimal # github.com/shurcooL/sanitized_anchor_name v1.0.0 github.com/shurcooL/sanitized_anchor_name # github.com/sirupsen/logrus v1.8.1 +## explicit github.com/sirupsen/logrus # github.com/spf13/cast v1.3.1 github.com/spf13/cast @@ -966,6 +967,7 @@ gopkg.in/tomb.v1 # gopkg.in/warnings.v0 v0.1.2 gopkg.in/warnings.v0 # gopkg.in/yaml.v2 v2.4.0 +## explicit gopkg.in/yaml.v2 # gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b gopkg.in/yaml.v3 From a21216621afb871af9f332066f3583228595368a Mon Sep 17 00:00:00 2001 From: Alexander Greene Date: Thu, 22 Jul 2021 11:55:50 -0400 Subject: [PATCH 2/2] fix psm deployment --- ...0_50_olm-06-psm-operator.deployment.ibm-cloud-managed.yaml | 4 ++-- manifests/0000_50_olm-06-psm-operator.deployment.yaml | 4 ++-- scripts/generate_crds_manifests.sh | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/manifests/0000_50_olm-06-psm-operator.deployment.ibm-cloud-managed.yaml b/manifests/0000_50_olm-06-psm-operator.deployment.ibm-cloud-managed.yaml index 9b5b013dca..a4526942ee 100644 --- a/manifests/0000_50_olm-06-psm-operator.deployment.ibm-cloud-managed.yaml +++ b/manifests/0000_50_olm-06-psm-operator.deployment.ibm-cloud-managed.yaml @@ -56,12 +56,12 @@ spec: httpGet: path: /healthz port: 8080 - initialDelaySeconds: 30 + initialDelaySeconds: 30 readinessProbe: httpGet: path: /healthz port: 8080 - initialDelaySeconds: 30 + initialDelaySeconds: 30 terminationMessagePolicy: FallbackToLogsOnError nodeSelector: kubernetes.io/os: linux diff --git a/manifests/0000_50_olm-06-psm-operator.deployment.yaml b/manifests/0000_50_olm-06-psm-operator.deployment.yaml index 3a45463821..1ba777e57e 100644 --- a/manifests/0000_50_olm-06-psm-operator.deployment.yaml +++ b/manifests/0000_50_olm-06-psm-operator.deployment.yaml @@ -55,12 +55,12 @@ spec: httpGet: path: /healthz port: 8080 - initialDelaySeconds: 30 + initialDelaySeconds: 30 readinessProbe: httpGet: path: /healthz port: 8080 - initialDelaySeconds: 30 + initialDelaySeconds: 30 terminationMessagePolicy: FallbackToLogsOnError nodeSelector: kubernetes.io/os: linux diff --git a/scripts/generate_crds_manifests.sh b/scripts/generate_crds_manifests.sh index ad618dbb10..b9f72ce04f 100755 --- a/scripts/generate_crds_manifests.sh +++ b/scripts/generate_crds_manifests.sh @@ -138,12 +138,12 @@ spec: httpGet: path: /healthz port: 8080 - initialDelaySeconds: 30 + initialDelaySeconds: 30 readinessProbe: httpGet: path: /healthz port: 8080 - initialDelaySeconds: 30 + initialDelaySeconds: 30 terminationMessagePolicy: FallbackToLogsOnError nodeSelector: kubernetes.io/os: linux