diff --git a/examples/simple/deploy/01_simple-csi-driver.yaml b/examples/simple/deploy/01_simple-csi-driver.yaml index 4fca9b8..4b35006 100644 --- a/examples/simple/deploy/01_simple-csi-driver.yaml +++ b/examples/simple/deploy/01_simple-csi-driver.yaml @@ -59,7 +59,7 @@ spec: allowPrivilegeEscalation: false capabilities: { drop: [ "ALL" ] } readOnlyRootFilesystem: true - image: registry.k8s.io/sig-storage/csi-node-driver-registrar:v2.13.0 + image: registry.k8s.io/sig-storage/csi-node-driver-registrar:v2.14.0 args: - --v=5 - --csi-address=/plugin/csi.sock @@ -94,6 +94,10 @@ spec: fieldPath: spec.nodeName - name: CSI_ENDPOINT value: unix://plugin/csi.sock + ports: + - containerPort: 9402 + name: http-metrics + protocol: TCP volumeMounts: - name: plugin-dir mountPath: /plugin diff --git a/examples/simple/deploy/02_example-app.yaml b/examples/simple/deploy/02_example-app.yaml index a63ea12..fdcd203 100644 --- a/examples/simple/deploy/02_example-app.yaml +++ b/examples/simple/deploy/02_example-app.yaml @@ -48,7 +48,7 @@ spec: runAsUser: 2000 containers: - name: my-frontend - image: busybox:1.35.0 + image: busybox:1.36.1 volumeMounts: - mountPath: "/tls" name: tls diff --git a/examples/simple/go.mod b/examples/simple/go.mod index 249782e..ce5dbc7 100644 --- a/examples/simple/go.mod +++ b/examples/simple/go.mod @@ -7,6 +7,8 @@ replace github.com/cert-manager/csi-lib => ../../ require ( github.com/cert-manager/cert-manager v1.19.1 github.com/cert-manager/csi-lib v0.0.0-00010101000000-000000000000 + github.com/prometheus/client_golang v1.23.2 + golang.org/x/sync v0.17.0 k8s.io/client-go v0.34.2 k8s.io/klog/v2 v2.130.1 k8s.io/utils v0.0.0-20251002143259-bc988d571ff4 @@ -43,7 +45,6 @@ require ( github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect - github.com/prometheus/client_golang v1.23.2 // indirect github.com/prometheus/client_model v0.6.2 // indirect github.com/prometheus/common v0.66.1 // indirect github.com/prometheus/procfs v0.17.0 // indirect diff --git a/examples/simple/go.sum b/examples/simple/go.sum index c01f33c..06abe81 100644 --- a/examples/simple/go.sum +++ b/examples/simple/go.sum @@ -171,6 +171,8 @@ golang.org/x/oauth2 v0.32.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwE golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug= +golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= diff --git a/examples/simple/main.go b/examples/simple/main.go index c62d763..24d672f 100644 --- a/examples/simple/main.go +++ b/examples/simple/main.go @@ -28,6 +28,7 @@ import ( "flag" "fmt" "net" + "net/http" "net/url" "strings" "time" @@ -35,13 +36,17 @@ import ( cmapi "github.com/cert-manager/cert-manager/pkg/apis/certmanager/v1" cmmeta "github.com/cert-manager/cert-manager/pkg/apis/meta/v1" cmclient "github.com/cert-manager/cert-manager/pkg/client/clientset/versioned" + "github.com/cert-manager/cert-manager/pkg/client/informers/externalversions" "github.com/cert-manager/cert-manager/pkg/util/pki" "github.com/cert-manager/csi-lib/driver" "github.com/cert-manager/csi-lib/manager" "github.com/cert-manager/csi-lib/metadata" + "github.com/cert-manager/csi-lib/metrics" "github.com/cert-manager/csi-lib/storage" + "github.com/prometheus/client_golang/prometheus" + "golang.org/x/sync/errgroup" "k8s.io/client-go/rest" - "k8s.io/klog/v2/klogr" + "k8s.io/klog/v2" "k8s.io/utils/clock" ) @@ -89,7 +94,7 @@ func main() { panic("-data-root must be set") } - log := klogr.New() + log := klog.TODO() restConfig, err := rest.InClusterConfig() if err != nil { @@ -103,13 +108,29 @@ func main() { store.FSGroupVolumeAttributeKey = FsGroupKey - d, err := driver.New(context.Background(), *endpoint, log, driver.Options{ + cmClient := cmclient.NewForConfigOrDie(restConfig) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + certRequestInformerFactory := externalversions.NewSharedInformerFactory(cmClient, 5*time.Second) + certRequestInformer := certRequestInformerFactory.Certmanager().V1().CertificateRequests() + metricsHandler := metrics.New(*nodeID, &log, prometheus.NewRegistry(), store, certRequestInformer.Lister()) + + go func() { + err := startMetricsServer(ctx, metricsHandler, certRequestInformerFactory) + if err != nil { + panic("failed to setup metrics server: " + err.Error()) + } + }() + + d, err := driver.New(ctx, *endpoint, log, driver.Options{ DriverName: "csi.cert-manager.io", DriverVersion: "v0.0.1", NodeID: *nodeID, Store: store, Manager: manager.NewManagerOrDie(manager.Options{ - Client: cmclient.NewForConfigOrDie(restConfig), + Client: cmClient, MetadataReader: store, Clock: clock.RealClock{}, Log: &log, @@ -118,6 +139,7 @@ func main() { GenerateRequest: generateRequest, SignRequest: signRequest, WriteKeypair: (&writer{store: store}).writeKeypair, + Metrics: metricsHandler, }), }) if err != nil { @@ -350,3 +372,49 @@ func keyUsagesFromAttributes(usagesCSV string) []cmapi.KeyUsage { return keyUsages } + +// startMetricsServer starts a server listening on port 9402, until the supplied context is cancelled, +// after which the server will gracefully shutdown (within 5 seconds). +func startMetricsServer( + rootCtx context.Context, + metricsHandler *metrics.Metrics, + certRequestInformerFactory externalversions.SharedInformerFactory, +) error { + g, ctx := errgroup.WithContext(rootCtx) + + listenConfig := &net.ListenConfig{} + metricsLn, err := listenConfig.Listen(ctx, "tcp", ":9402") + if err != nil { + return err + } + metricsServer := &http.Server{ + Addr: metricsLn.Addr().String(), + ReadTimeout: 8 * time.Second, + WriteTimeout: 8 * time.Second, + MaxHeaderBytes: 1 << 20, // 1 MiB + Handler: metricsHandler.DefaultHandler(), + } + + g.Go(func() error { + certRequestInformerFactory.Start(ctx.Done()) + certRequestInformerFactory.WaitForCacheSync(ctx.Done()) + return nil + }) + g.Go(func() error { + <-rootCtx.Done() + // allow a timeout for graceful shutdown + shutdownCtx, cancel := context.WithTimeout(ctx, 5*time.Second) + defer cancel() + + // nolint: contextcheck + return metricsServer.Shutdown(shutdownCtx) + }) + g.Go(func() error { + // starting metrics server + if err := metricsServer.Serve(metricsLn); err != http.ErrServerClosed { + return err + } + return nil + }) + return g.Wait() +} diff --git a/go.mod b/go.mod index d196437..9a99c9a 100644 --- a/go.mod +++ b/go.mod @@ -7,6 +7,7 @@ require ( github.com/container-storage-interface/spec v1.12.0 github.com/go-logr/logr v1.4.3 github.com/kubernetes-csi/csi-lib-utils v0.23.0 + github.com/prometheus/client_golang v1.23.2 github.com/stretchr/testify v1.11.1 google.golang.org/grpc v1.77.0 k8s.io/apimachinery v0.34.2 @@ -17,12 +18,15 @@ require ( ) require ( + github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/blang/semver/v4 v4.0.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/emicklei/go-restful/v3 v3.13.0 // indirect github.com/fxamacker/cbor/v2 v2.9.0 // indirect + github.com/go-asn1-ber/asn1-ber v1.5.8-0.20250403174932-29230038a667 // indirect + github.com/go-ldap/ldap/v3 v3.4.12 // indirect github.com/go-logr/zapr v1.3.0 // indirect github.com/go-openapi/jsonpointer v0.22.1 // indirect github.com/go-openapi/jsonreference v0.21.2 // indirect @@ -35,13 +39,13 @@ require ( github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect + github.com/kylelemons/godebug v1.1.0 // indirect github.com/mailru/easyjson v0.9.0 // indirect github.com/moby/sys/mountinfo v0.7.2 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect - github.com/prometheus/client_golang v1.23.2 // indirect github.com/prometheus/client_model v0.6.2 // indirect github.com/prometheus/common v0.66.1 // indirect github.com/prometheus/procfs v0.17.0 // indirect @@ -54,6 +58,7 @@ require ( go.uber.org/zap v1.27.0 // indirect go.yaml.in/yaml/v2 v2.4.2 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect + golang.org/x/crypto v0.43.0 // indirect golang.org/x/net v0.46.1-0.20251013234738-63d1a5100f82 // indirect golang.org/x/oauth2 v0.32.0 // indirect golang.org/x/sys v0.37.0 // indirect diff --git a/go.sum b/go.sum index 988dea6..c01f33c 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,7 @@ +github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 h1:mFRzDkZVAjdal+s7s0MwaRv9igoPqLRdzOLzw/8Xvq8= +github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU= +github.com/alexbrainman/sspi v0.0.0-20250919150558-7d374ff0d59e h1:4dAU9FXIyQktpoUAgOJK3OTFc/xug0PCXYCqU0FgDKI= +github.com/alexbrainman/sspi v0.0.0-20250919150558-7d374ff0d59e/go.mod h1:cEWa1LVoE5KvSD9ONXsZrj0z6KqySlCCNKHlLzbqAt4= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM= @@ -17,6 +21,10 @@ github.com/emicklei/go-restful/v3 v3.13.0 h1:C4Bl2xDndpU6nJ4bc1jXd+uTmYPVUwkD6bF github.com/emicklei/go-restful/v3 v3.13.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM= github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ= +github.com/go-asn1-ber/asn1-ber v1.5.8-0.20250403174932-29230038a667 h1:BP4M0CvQ4S3TGls2FvczZtj5Re/2ZzkV9VwqPHH/3Bo= +github.com/go-asn1-ber/asn1-ber v1.5.8-0.20250403174932-29230038a667/go.mod h1:hEBeB/ic+5LoWskz+yKT7vGhhPYkProFKoKdwZRWMe0= +github.com/go-ldap/ldap/v3 v3.4.12 h1:1b81mv7MagXZ7+1r7cLTWmyuTqVqdwbtJSjC0DAp9s4= +github.com/go-ldap/ldap/v3 v3.4.12/go.mod h1:+SPAGcTtOfmGsCb3h1RFiq4xpp4N636G75OEace8lNo= github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= @@ -46,8 +54,22 @@ github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db h1:097atOisP2aRj7vFgY github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/hashicorp/go-uuid v1.0.3 h1:2gKiV6YVmrJ1i2CKKa9obLvRieoRGviZFL26PcT/Co8= +github.com/hashicorp/go-uuid v1.0.3/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/jcmturner/aescts/v2 v2.0.0 h1:9YKLH6ey7H4eDBXW8khjYslgyqG2xZikXP0EQFKrle8= +github.com/jcmturner/aescts/v2 v2.0.0/go.mod h1:AiaICIRyfYg35RUkr8yESTqvSy7csK90qZ5xfvvsoNs= +github.com/jcmturner/dnsutils/v2 v2.0.0 h1:lltnkeZGL0wILNvrNiVCR6Ro5PGU/SeBvVO/8c/iPbo= +github.com/jcmturner/dnsutils/v2 v2.0.0/go.mod h1:b0TnjGOvI/n42bZa+hmXL+kFJZsFT7G4t3HTlQ184QM= +github.com/jcmturner/gofork v1.7.6 h1:QH0l3hzAU1tfT3rZCnW5zXl+orbkNMMRGJfdJjHVETg= +github.com/jcmturner/gofork v1.7.6/go.mod h1:1622LH6i/EZqLloHfE7IeZ0uEJwMSUyQ/nDd82IeqRo= +github.com/jcmturner/goidentity/v6 v6.0.1 h1:VKnZd2oEIMorCTsFBnJWbExfNN7yZr3EhJAxwOkZg6o= +github.com/jcmturner/goidentity/v6 v6.0.1/go.mod h1:X1YW3bgtvwAXju7V3LCIMpY0Gbxyjn/mY9zx4tFonSg= +github.com/jcmturner/gokrb5/v8 v8.4.4 h1:x1Sv4HaTpepFkXbt2IkL29DXRf8sOfZXo8eRKh687T8= +github.com/jcmturner/gokrb5/v8 v8.4.4/go.mod h1:1btQEpgT6k+unzCwX1KdWMEwPPkkgBtP+F6aCACiMrs= +github.com/jcmturner/rpc/v2 v2.0.3 h1:7FXXj8Ti1IaVFpSAziCZWNzbNuZmnvw/i6CqLNdWfZY= +github.com/jcmturner/rpc/v2 v2.0.3/go.mod h1:VUJYCIDm3PVOEHw8sgt091/20OJjskO/YJki3ELg/Hc= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= @@ -134,6 +156,8 @@ go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.43.0 h1:dduJYIi3A3KOfdGOHX8AVZ/jGiyPa3IbBozJ5kNuE04= +golang.org/x/crypto v0.43.0/go.mod h1:BFbav4mRNlXJL4wNeejLpWxB7wMbc79PdRGhWKncxR0= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= diff --git a/manager/manager.go b/manager/manager.go index 49ff7a6..4887803 100644 --- a/manager/manager.go +++ b/manager/manager.go @@ -47,6 +47,7 @@ import ( internalapi "github.com/cert-manager/csi-lib/internal/api" internalapiutil "github.com/cert-manager/csi-lib/internal/api/util" "github.com/cert-manager/csi-lib/metadata" + "github.com/cert-manager/csi-lib/metrics" "github.com/cert-manager/csi-lib/storage" ) @@ -89,6 +90,9 @@ type Options struct { // RenewalBackoffConfig configures the exponential backoff applied to certificate renewal failures. RenewalBackoffConfig *wait.Backoff + + // Metrics is used for exposing Prometheus metrics + Metrics *metrics.Metrics } // NewManager constructs a new manager used to manage volumes containing @@ -241,6 +245,7 @@ func NewManager(opts Options) (*Manager, error) { metadataReader: opts.MetadataReader, clock: opts.Clock, log: *opts.Log, + metrics: opts.Metrics, generatePrivateKey: opts.GeneratePrivateKey, generateRequest: opts.GenerateRequest, @@ -368,25 +373,20 @@ type Manager struct { // Defaults to uuid.NewUUID() from k8s.io/apimachinery/pkg/util/uuid. requestNameGenerator func() string - // doNotUse_CallOnEachIssue is a field used SOLELY for testing, and cannot be configured by external package consumers. - // It is used to perform some action (e.g. counting) each time issue() is called. - // It will be removed as soon as we have actual metrics support in csi-lib, which will allow us to measure - // things like the number of times issue() is called. - // No thread safety is added around this field, and it MUST NOT be used for any implementation logic. - // It should not be used full-stop :). - doNotUse_CallOnEachIssue func() + // metrics is used for Prometheus metrics collection + metrics *metrics.Metrics } // issue will step through the entire issuance flow for a volume. func (m *Manager) issue(ctx context.Context, volumeID string) error { - // TODO: remove this code and replace with actual metrics support - if m.doNotUse_CallOnEachIssue != nil { - m.doNotUse_CallOnEachIssue() - } - log := m.log.WithValues("volume_id", volumeID) log.Info("Processing issuance") + // Increase issue count + if m.metrics != nil { + m.metrics.IncrementIssueCallCountTotal(m.nodeNameHash, volumeID) + } + if err := m.cleanupStaleRequests(ctx, log, volumeID); err != nil { return fmt.Errorf("cleaning up stale requests: %w", err) } @@ -756,6 +756,10 @@ func (m *Manager) ManageVolumeImmediate(ctx context.Context, volumeID string) (m // If issuance fails, immediately return without retrying so the caller can decide // how to proceed depending on the context this method was called within. if err := m.issue(ctx, volumeID); err != nil { + // Increase issue error count + if m.metrics != nil { + m.metrics.IncrementIssueErrorCountTotal(m.nodeNameHash, volumeID) + } return true, err } } @@ -835,6 +839,10 @@ func (m *Manager) startRenewalRoutine(volumeID string) (started bool) { defer issueCancel() if err := m.issue(issueCtx, volumeID); err != nil { log.Error(err, "Failed to issue certificate, retrying after applying exponential backoff") + // Increase issue error count + if m.metrics != nil { + m.metrics.IncrementIssueErrorCountTotal(m.nodeNameHash, volumeID) + } return false, nil } return true, nil diff --git a/manager/manager_test.go b/manager/manager_test.go index aa99d51..c9abea6 100644 --- a/manager/manager_test.go +++ b/manager/manager_test.go @@ -24,13 +24,13 @@ import ( "encoding/pem" "fmt" "math/big" - "sync/atomic" "testing" "time" cmapi "github.com/cert-manager/cert-manager/pkg/apis/certmanager/v1" cmmeta "github.com/cert-manager/cert-manager/pkg/apis/meta/v1" "github.com/go-logr/logr/testr" + "github.com/prometheus/client_golang/prometheus" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -41,6 +41,7 @@ import ( internalapi "github.com/cert-manager/csi-lib/internal/api" internalapiutil "github.com/cert-manager/csi-lib/internal/api/util" "github.com/cert-manager/csi-lib/metadata" + "github.com/cert-manager/csi-lib/metrics" "github.com/cert-manager/csi-lib/storage" testutil "github.com/cert-manager/csi-lib/test/util" ) @@ -309,14 +310,13 @@ func TestManager_ManageVolume_exponentialBackOffRetryOnIssueErrors(t *testing.T) // Expected number of retries in each expBackOff cycle := // ⌈log base expBackOffFactor of (expBackOffCap/expBackOffDuration)⌉ - var expectNumOfRetries int32 = 3 // ⌈log2(500/100)⌉ + var expectNumOfRetries float64 = 3 // ⌈log2(500/100)⌉ // Because in startRenewalRoutine, ticker := time.NewTicker(time.Second) // 2 seconds should complete an expBackOff cycle // ticker start time (1s) + expBackOffCap (0.5s) + expectNumOfRetries (3) * issueRenewalTimeout (0.1) expectGlobalTimeout := 2 * time.Second - var numOfRetries int32 = 0 // init opts := newDefaultTestOptions(t) opts.RenewalBackoffConfig = &wait.Backoff{ Duration: expBackOffDuration, @@ -325,27 +325,36 @@ func TestManager_ManageVolume_exponentialBackOffRetryOnIssueErrors(t *testing.T) Jitter: expBackOffJitter, Steps: expBackOffSteps, } + + // Create the manager first to get access to the lister m, err := NewManager(opts) if err != nil { t.Fatal(err) } m.issueRenewalTimeout = issueRenewalTimeout - // Increment the 'numOfRetries' counter whenever issue() is called. - // TODO: replace usages of this function with reading from metrics. - m.doNotUse_CallOnEachIssue = func() { - atomic.AddInt32(&numOfRetries, 1) // run in a goroutine, thus increment it atomically - } - // Register a new volume with the metadata store + // Create metrics for the manager using the manager's lister + log := testr.New(t) + registry := prometheus.NewRegistry() store := opts.MetadataReader.(storage.Interface) + metricsHandler := metrics.New(opts.NodeID, &log, registry, store, m.lister) + + // Update the manager's metrics + m.metrics = metricsHandler + + // Register a new volume with the metadata store meta := metadata.Metadata{ VolumeID: "vol-id", TargetPath: "/fake/path", } - store.RegisterMetadata(meta) + if _, err := store.RegisterMetadata(meta); err != nil { + t.Fatal(err) + } // Ensure we stop managing the volume after the test defer func() { - store.RemoveVolume(meta.VolumeID) + if err := store.RemoveVolume(meta.VolumeID); err != nil { + t.Logf("failed to remove volume: %v", err) + } m.UnmanageVolume(meta.VolumeID) }() @@ -357,9 +366,26 @@ func TestManager_ManageVolume_exponentialBackOffRetryOnIssueErrors(t *testing.T) time.Sleep(expectGlobalTimeout) - actualNumOfRetries := atomic.LoadInt32(&numOfRetries) // read atomically + // Read the metric value from the registry + // Gather all metrics and find the certmanager_csi_issue_requests_total metric + metricFamilies, err := registry.Gather() + if err != nil { + t.Fatalf("failed to gather metrics: %v", err) + } + + var actualNumOfRetries float64 + for _, mf := range metricFamilies { + if mf.GetName() == "certmanager_csi_issue_requests_total" { + // Get the first metric (there should only be one with our labels) + if len(mf.GetMetric()) > 0 { + actualNumOfRetries = mf.GetMetric()[0].GetCounter().GetValue() + } + break + } + } + if actualNumOfRetries != expectNumOfRetries { - t.Errorf("expect %d retires, but got %d", expectNumOfRetries, actualNumOfRetries) + t.Errorf("expect %g retries, but got %g", expectNumOfRetries, actualNumOfRetries) } } diff --git a/metrics/certificaterequest_collector.go b/metrics/certificaterequest_collector.go new file mode 100644 index 0000000..b759da6 --- /dev/null +++ b/metrics/certificaterequest_collector.go @@ -0,0 +1,249 @@ +/* +Copyright 2025 The cert-manager Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "crypto/x509" + "encoding/pem" + "fmt" + "time" + + cmapi "github.com/cert-manager/cert-manager/pkg/apis/certmanager/v1" + cmmeta "github.com/cert-manager/cert-manager/pkg/apis/meta/v1" + cmlisters "github.com/cert-manager/cert-manager/pkg/client/listers/certmanager/v1" + "github.com/prometheus/client_golang/prometheus" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/selection" + + internalapi "github.com/cert-manager/csi-lib/internal/api" + internalapiutil "github.com/cert-manager/csi-lib/internal/api/util" + "github.com/cert-manager/csi-lib/storage" +) + +var ( + certRequestReadyConditionStatuses = [...]cmmeta.ConditionStatus{cmmeta.ConditionTrue, cmmeta.ConditionFalse, cmmeta.ConditionUnknown} + certRequestReadyStatusMetric = prometheus.NewDesc("certmanager_csi_certificate_request_ready_status", "The ready status of the certificate request.", []string{"name", "namespace", "condition", "issuer_name", "issuer_kind", "issuer_group"}, nil) + certRequestExpirationTimestampSeconds = prometheus.NewDesc("certmanager_csi_certificate_request_expiration_timestamp_seconds", "The timestamp after which the certificate request expires, expressed in Unix Epoch Time.", []string{"name", "namespace", "issuer_name", "issuer_kind", "issuer_group"}, nil) + certRequestRenewalTimestampSeconds = prometheus.NewDesc("certmanager_csi_certificate_request_renewal_timestamp_seconds", "The timestamp after which the certificate request should be renewed, expressed in Unix Epoch Time.", []string{"name", "namespace", "issuer_name", "issuer_kind", "issuer_group"}, nil) + managedVolumeCount = prometheus.NewDesc("certmanager_csi_managed_volume_count", "The number of managed volumes by the csi driver.", []string{"node"}, nil) + managedCertRequestCount = prometheus.NewDesc("certmanager_csi_managed_certificate_request_count", "The number of managed certificate requests by the csi driver.", []string{"node"}, nil) +) + +type CertificateRequestCollector struct { + nodeNameHash string + metadataReader storage.MetadataReader + certificateRequestLister cmlisters.CertificateRequestLister + certificateRequestReadyStatusMetric *prometheus.Desc + certificateRequestExpirationTimestampSeconds *prometheus.Desc + certificateRequestRenewalTimestampSeconds *prometheus.Desc + managedVolumeCount *prometheus.Desc + managedCertificateRequestCount *prometheus.Desc +} + +func NewCertificateRequestCollector(nodeNameHash string, metadataReader storage.MetadataReader, certificateRequestLister cmlisters.CertificateRequestLister) prometheus.Collector { + return &CertificateRequestCollector{ + nodeNameHash: nodeNameHash, + metadataReader: metadataReader, + certificateRequestLister: certificateRequestLister, + certificateRequestReadyStatusMetric: certRequestReadyStatusMetric, + certificateRequestExpirationTimestampSeconds: certRequestExpirationTimestampSeconds, + certificateRequestRenewalTimestampSeconds: certRequestRenewalTimestampSeconds, + managedVolumeCount: managedVolumeCount, + managedCertificateRequestCount: managedCertRequestCount, + } +} + +func (cc *CertificateRequestCollector) Describe(ch chan<- *prometheus.Desc) { + ch <- cc.certificateRequestReadyStatusMetric + ch <- cc.certificateRequestExpirationTimestampSeconds + ch <- cc.certificateRequestRenewalTimestampSeconds + ch <- cc.managedVolumeCount + ch <- cc.managedCertificateRequestCount +} + +func (cc *CertificateRequestCollector) Collect(ch chan<- prometheus.Metric) { + // Get the certificate requests from the lister, filtered by node selector + nodeSelector := labels.NewSelector() + req, err := labels.NewRequirement(internalapi.NodeIDHashLabelKey, selection.Equals, []string{cc.nodeNameHash}) + if err != nil { + return + } + nodeSelector = nodeSelector.Add(*req) + certRequestList, err := cc.certificateRequestLister.List(nodeSelector) + if err != nil { + return + } + cc.updateManagedCertificateRequestCount(len(certRequestList), ch) + + // Get the next issuance time map from the metadata reader + nextIssuanceTimeMap, err := cc.getNextIssuanceTimeMapFromMetadata() + if err != nil { + return + } + cc.updateManagedVolumeCount(len(nextIssuanceTimeMap), ch) // each volume has one nextIssuanceTime entry + + for _, cr := range certRequestList { + cc.updateCertificateRequestReadyStatus(cr, ch) + cc.updateCertificateRequestExpiry(cr, ch) + cc.updateCertificateRequestRenewalTime(cr, nextIssuanceTimeMap, ch) + } +} + +func (cc *CertificateRequestCollector) updateCertificateRequestReadyStatus(cr *cmapi.CertificateRequest, ch chan<- prometheus.Metric) { + setMetric := func(cr *cmapi.CertificateRequest, ch chan<- prometheus.Metric, status cmmeta.ConditionStatus) { + for _, condition := range certRequestReadyConditionStatuses { + value := 0.0 + + if status == condition { + value = 1.0 + } + + metric := prometheus.MustNewConstMetric( + cc.certificateRequestReadyStatusMetric, prometheus.GaugeValue, + value, + cr.Name, + cr.Namespace, + string(condition), + cr.Spec.IssuerRef.Name, + cr.Spec.IssuerRef.Kind, + cr.Spec.IssuerRef.Group, + ) + + ch <- metric + } + } + + for _, st := range cr.Status.Conditions { + if st.Type == cmapi.CertificateRequestConditionReady { + setMetric(cr, ch, st.Status) + return + } + } + + setMetric(cr, ch, cmmeta.ConditionUnknown) +} + +func (cc *CertificateRequestCollector) updateCertificateRequestExpiry(cr *cmapi.CertificateRequest, ch chan<- prometheus.Metric) { + expiryTime := 0.0 + + if cr.Status.Certificate != nil { + notAfter, err := getCertNotAfterTime(cr.Status.Certificate) + if err != nil { + return + } + expiryTime = float64(notAfter.Unix()) + } + + metric := prometheus.MustNewConstMetric( + cc.certificateRequestExpirationTimestampSeconds, + prometheus.GaugeValue, + expiryTime, + cr.Name, + cr.Namespace, + cr.Spec.IssuerRef.Name, + cr.Spec.IssuerRef.Kind, + cr.Spec.IssuerRef.Group, + ) + + ch <- metric +} + +// updateCertificateRequestRenewalTime updates the renewal time metric for the given certificate request. +// The renewal time is the time at which the volume should be renewed. +// Note: there might be multiple certificate requests for a volume depending on the MaxRequestsPerVolume value, +// but only the latest one will be stored in the nextIssuanceTimeMap. +func (cc *CertificateRequestCollector) updateCertificateRequestRenewalTime(cr *cmapi.CertificateRequest, nextIssuanceTimeMap map[string]time.Time, ch chan<- prometheus.Metric) { + renewalTime := 0.0 + + if len(cr.Labels) != 0 { + if nextIssuanceTime, ok := nextIssuanceTimeMap[cr.Labels[internalapi.VolumeIDHashLabelKey]]; ok { + renewalTime = float64(nextIssuanceTime.Unix()) + } + } + + metric := prometheus.MustNewConstMetric( + cc.certificateRequestRenewalTimestampSeconds, + prometheus.GaugeValue, + renewalTime, + cr.Name, + cr.Namespace, + cr.Spec.IssuerRef.Name, + cr.Spec.IssuerRef.Kind, + cr.Spec.IssuerRef.Group, + ) + + ch <- metric +} + +// getCertNotAfterTime returns the NotAfter time of the issued certificate. +// It expects the certificate to be encoded in PEM format. +func getCertNotAfterTime(certBytes []byte) (time.Time, error) { + block, _ := pem.Decode(certBytes) + if block == nil { + return time.Time{}, fmt.Errorf("invalid PEM data: could not decode certificate") + } + crt, err := x509.ParseCertificate(block.Bytes) + if err != nil { + return time.Time{}, fmt.Errorf("parsing issued certificate: %w", err) + } + + return crt.NotAfter, nil +} + +// getNextIssuanceTimeMapFromMetadata returns a map of volume ID hashes to the next issuance time. +// The map is keyed by the volume ID hash. +// The next issuance time is the time at which the volume should be renewed. +func (cc *CertificateRequestCollector) getNextIssuanceTimeMapFromMetadata() (map[string]time.Time, error) { + volumeIDs, err := cc.metadataReader.ListVolumes() + if err != nil { + return nil, fmt.Errorf("listing volumes: %w", err) + } + + nextIssuanceTimeMap := make(map[string]time.Time, len(volumeIDs)) + for _, id := range volumeIDs { + volumeMetadata, err := cc.metadataReader.ReadMetadata(id) + if err != nil { + return nil, err + } + if volumeMetadata.NextIssuanceTime != nil { + nextIssuanceTimeMap[internalapiutil.HashIdentifier(id)] = *volumeMetadata.NextIssuanceTime + } + } + return nextIssuanceTimeMap, nil +} + +func (cc *CertificateRequestCollector) updateManagedVolumeCount(count int, ch chan<- prometheus.Metric) { + metric := prometheus.MustNewConstMetric( + cc.managedVolumeCount, + prometheus.GaugeValue, + float64(count), + cc.nodeNameHash, + ) + + ch <- metric +} + +func (cc *CertificateRequestCollector) updateManagedCertificateRequestCount(count int, ch chan<- prometheus.Metric) { + metric := prometheus.MustNewConstMetric( + cc.managedCertificateRequestCount, + prometheus.GaugeValue, + float64(count), + cc.nodeNameHash, + ) + + ch <- metric +} diff --git a/metrics/certificaterequest_test.go b/metrics/certificaterequest_test.go new file mode 100644 index 0000000..0589a7b --- /dev/null +++ b/metrics/certificaterequest_test.go @@ -0,0 +1,450 @@ +/* +Copyright 2025 The cert-manager Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "strings" + "testing" + "time" + + cmapi "github.com/cert-manager/cert-manager/pkg/apis/certmanager/v1" + cmmeta "github.com/cert-manager/cert-manager/pkg/apis/meta/v1" + "github.com/cert-manager/cert-manager/pkg/client/clientset/versioned/fake" + "github.com/cert-manager/cert-manager/pkg/client/informers/externalversions" + testcrypto "github.com/cert-manager/cert-manager/test/unit/crypto" + "github.com/cert-manager/cert-manager/test/unit/gen" + "github.com/go-logr/logr/testr" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/stretchr/testify/assert" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + internalapi "github.com/cert-manager/csi-lib/internal/api" + internalapiutil "github.com/cert-manager/csi-lib/internal/api/util" + "github.com/cert-manager/csi-lib/metadata" + "github.com/cert-manager/csi-lib/storage" +) + +const expiryMetadata = ` + # HELP certmanager_csi_certificate_request_expiration_timestamp_seconds The timestamp after which the certificate request expires, expressed in Unix Epoch Time. + # TYPE certmanager_csi_certificate_request_expiration_timestamp_seconds gauge +` + +const renewalTimeMetadata = ` + # HELP certmanager_csi_certificate_request_renewal_timestamp_seconds The timestamp after which the certificate request should be renewed, expressed in Unix Epoch Time. + # TYPE certmanager_csi_certificate_request_renewal_timestamp_seconds gauge +` + +const readyMetadata = ` + # HELP certmanager_csi_certificate_request_ready_status The ready status of the certificate request. + # TYPE certmanager_csi_certificate_request_ready_status gauge +` + +func TestCertificateRequestMetrics(t *testing.T) { + testNodeName := "test-node-name" + testVolumeID := "test-vol-id" + + // private key to be used to generate X509 certificate + privKey := testcrypto.MustCreatePEMPrivateKey(t) + certTemplate := &cmapi.Certificate{ + ObjectMeta: metav1.ObjectMeta{Namespace: "test-ns", Name: "test-cert"}, + Spec: cmapi.CertificateSpec{ + CommonName: "test.example.com", + }, + } + notBefore := time.Unix(0, 0) + notAfter := time.Unix(100, 0) + testCert := testcrypto.MustCreateCertWithNotBeforeAfter(t, privKey, certTemplate, notBefore, notAfter) + renew := time.Unix(50, 0) + + type testT struct { + cr *cmapi.CertificateRequest + meta metadata.Metadata + expectedExpiry, expectedReady, expectedRenewalTime string + } + tests := map[string]testT{ + "certificate with expiry and ready status": { + cr: gen.CertificateRequest("test-certificate-request", + gen.SetCertificateRequestNamespace("test-ns"), + gen.SetCertificateRequestIssuer(cmmeta.ObjectReference{ + Name: "test-issuer", + Kind: "test-issuer-kind", + Group: "test-issuer-group", + }), + gen.SetCertificateRequestStatusCondition(cmapi.CertificateRequestCondition{ + Type: cmapi.CertificateRequestConditionReady, + Status: cmmeta.ConditionTrue, + }), + gen.SetCertificateRequestCertificate(testCert), + ), + + expectedExpiry: ` + certmanager_csi_certificate_request_expiration_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 100 +`, + expectedReady: ` + certmanager_csi_certificate_request_ready_status{condition="False",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 0 + certmanager_csi_certificate_request_ready_status{condition="True",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 1 + certmanager_csi_certificate_request_ready_status{condition="Unknown",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 0 +`, + expectedRenewalTime: ` + certmanager_csi_certificate_request_renewal_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 0 +`, + }, + "certificate with no expiry and no status should give an expiry of 0 and Unknown status": { + cr: gen.CertificateRequest("test-certificate-request", + gen.SetCertificateRequestNamespace("test-ns"), + gen.SetCertificateRequestIssuer(cmmeta.ObjectReference{ + Name: "test-issuer", + Kind: "test-issuer-kind", + Group: "test-issuer-group", + }), + ), + + expectedExpiry: ` + certmanager_csi_certificate_request_expiration_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 0 +`, + expectedReady: ` + certmanager_csi_certificate_request_ready_status{condition="False",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 0 + certmanager_csi_certificate_request_ready_status{condition="True",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 0 + certmanager_csi_certificate_request_ready_status{condition="Unknown",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 1 +`, + expectedRenewalTime: ` + certmanager_csi_certificate_request_renewal_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 0 +`, + }, + "certificate with expiry and status False should give an expiry and False status": { + cr: gen.CertificateRequest("test-certificate-request", + gen.SetCertificateRequestNamespace("test-ns"), + gen.SetCertificateRequestIssuer(cmmeta.ObjectReference{ + Name: "test-issuer", + Kind: "test-issuer-kind", + Group: "test-issuer-group", + }), + gen.SetCertificateRequestStatusCondition(cmapi.CertificateRequestCondition{ + Type: cmapi.CertificateRequestConditionReady, + Status: cmmeta.ConditionFalse, + }), + gen.SetCertificateRequestCertificate(testCert), + ), + + expectedExpiry: ` + certmanager_csi_certificate_request_expiration_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 100 +`, + expectedReady: ` + certmanager_csi_certificate_request_ready_status{condition="False",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 1 + certmanager_csi_certificate_request_ready_status{condition="True",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 0 + certmanager_csi_certificate_request_ready_status{condition="Unknown",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 0 +`, + expectedRenewalTime: ` + certmanager_csi_certificate_request_renewal_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 0 +`, + }, + "certificate with expiry and status Unknown should give an expiry and Unknown status": { + cr: gen.CertificateRequest("test-certificate-request", + gen.SetCertificateRequestNamespace("test-ns"), + gen.SetCertificateRequestIssuer(cmmeta.ObjectReference{ + Name: "test-issuer", + Kind: "test-issuer-kind", + Group: "test-issuer-group", + }), + gen.SetCertificateRequestStatusCondition(cmapi.CertificateRequestCondition{ + Type: cmapi.CertificateRequestConditionReady, + Status: cmmeta.ConditionUnknown, + }), + gen.SetCertificateRequestCertificate(testCert), + ), + + expectedExpiry: ` + certmanager_csi_certificate_request_expiration_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 100 +`, + expectedReady: ` + certmanager_csi_certificate_request_ready_status{condition="False",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 0 + certmanager_csi_certificate_request_ready_status{condition="True",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 0 + certmanager_csi_certificate_request_ready_status{condition="Unknown",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 1 +`, + expectedRenewalTime: ` + certmanager_csi_certificate_request_renewal_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 0 +`, + }, + "certificate with expiry and ready status and renew before": { + cr: gen.CertificateRequest("test-certificate-request", + gen.SetCertificateRequestNamespace("test-ns"), + gen.SetCertificateRequestIssuer(cmmeta.ObjectReference{ + Name: "test-issuer", + Kind: "test-issuer-kind", + Group: "test-issuer-group", + }), + gen.SetCertificateRequestStatusCondition(cmapi.CertificateRequestCondition{ + Type: cmapi.CertificateRequestConditionReady, + Status: cmmeta.ConditionTrue, + }), + gen.SetCertificateRequestCertificate(testCert), + ), + meta: metadata.Metadata{ + VolumeID: testVolumeID, + NextIssuanceTime: &renew, + }, + + expectedExpiry: ` + certmanager_csi_certificate_request_expiration_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 100 +`, + expectedReady: ` + certmanager_csi_certificate_request_ready_status{condition="False",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 0 + certmanager_csi_certificate_request_ready_status{condition="True",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 1 + certmanager_csi_certificate_request_ready_status{condition="Unknown",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 0 +`, + expectedRenewalTime: ` + certmanager_csi_certificate_request_renewal_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 50 +`, + }, + } + for n, test := range tests { + t.Run(n, func(t *testing.T) { + testLog := testr.New(t) + + fakeClient := fake.NewSimpleClientset() + factory := externalversions.NewSharedInformerFactory(fakeClient, 0) + certRequestInformer := factory.Certmanager().V1().CertificateRequests() + test.cr.Labels = map[string]string{ + internalapi.NodeIDHashLabelKey: internalapiutil.HashIdentifier(testNodeName), + internalapi.VolumeIDHashLabelKey: internalapiutil.HashIdentifier(testVolumeID), + } + err := certRequestInformer.Informer().GetIndexer().Add(test.cr) + assert.NoError(t, err) + fakeMetadata := storage.NewMemoryFS() + fakeMetadata.RegisterMetadata(test.meta) + m := New(testNodeName, &testLog, prometheus.NewRegistry(), fakeMetadata, certRequestInformer.Lister()) + + if err := testutil.CollectAndCompare(m.certificateRequestCollector, + strings.NewReader(expiryMetadata+test.expectedExpiry), + "certmanager_csi_certificate_request_expiration_timestamp_seconds", + ); err != nil { + t.Errorf("unexpected collecting result:\n%s", err) + } + + if err := testutil.CollectAndCompare(m.certificateRequestCollector, + strings.NewReader(renewalTimeMetadata+test.expectedRenewalTime), + "certmanager_csi_certificate_request_renewal_timestamp_seconds", + ); err != nil { + t.Errorf("unexpected collecting result:\n%s", err) + } + + if err := testutil.CollectAndCompare(m.certificateRequestCollector, + strings.NewReader(readyMetadata+test.expectedReady), + "certmanager_csi_certificate_request_ready_status", + ); err != nil { + t.Errorf("unexpected collecting result:\n%s", err) + } + + err = certRequestInformer.Informer().GetIndexer().Delete(test.cr) + assert.NoError(t, err) + }) + } +} + +func TestCertificateRequestCache(t *testing.T) { + testNodeName := "test-node-name" + testNodeNameHash := internalapiutil.HashIdentifier(testNodeName) + + // private key to be used to generate X509 certificate + privKey := testcrypto.MustCreatePEMPrivateKey(t) + certTemplate := &cmapi.Certificate{ + ObjectMeta: metav1.ObjectMeta{Namespace: "testns", Name: "test"}, + Spec: cmapi.CertificateSpec{ + CommonName: "test.example.com", + }, + } + notBefore := time.Unix(0, 0) + notAfter1, notAfter2, notAfter3 := + time.Unix(100, 0), time.Unix(200, 0), time.Unix(300, 0) + renew1, renew2, renew3 := + time.Unix(50, 0), time.Unix(150, 0), time.Unix(250, 0) + + cr1 := gen.CertificateRequest("cr1", + gen.SetCertificateRequestNamespace("testns"), + gen.SetCertificateRequestIssuer(cmmeta.ObjectReference{ + Name: "test-issuer", + Kind: "test-issuer-kind", + Group: "test-issuer-group", + }), + gen.SetCertificateRequestStatusCondition(cmapi.CertificateRequestCondition{ + Type: cmapi.CertificateRequestConditionReady, + Status: cmmeta.ConditionUnknown, + }), + gen.SetCertificateRequestCertificate( + testcrypto.MustCreateCertWithNotBeforeAfter(t, privKey, certTemplate, notBefore, notAfter1)), + ) + cr2 := gen.CertificateRequest("cr2", + gen.SetCertificateRequestNamespace("testns"), + gen.SetCertificateRequestIssuer(cmmeta.ObjectReference{ + Name: "test-issuer", + Kind: "test-issuer-kind", + Group: "test-issuer-group", + }), + gen.SetCertificateRequestStatusCondition(cmapi.CertificateRequestCondition{ + Type: cmapi.CertificateRequestConditionReady, + Status: cmmeta.ConditionTrue, + }), + gen.SetCertificateRequestCertificate( + testcrypto.MustCreateCertWithNotBeforeAfter(t, privKey, certTemplate, notBefore, notAfter2)), + ) + cr3 := gen.CertificateRequest("cr3", + gen.SetCertificateRequestNamespace("testns"), + gen.SetCertificateRequestIssuer(cmmeta.ObjectReference{ + Name: "test-issuer", + Kind: "test-issuer-kind", + Group: "test-issuer-group", + }), + gen.SetCertificateRequestStatusCondition(cmapi.CertificateRequestCondition{ + Type: cmapi.CertificateRequestConditionReady, + Status: cmmeta.ConditionFalse, + }), + gen.SetCertificateRequestCertificate( + testcrypto.MustCreateCertWithNotBeforeAfter(t, privKey, certTemplate, notBefore, notAfter3)), + ) + + cr1.Labels = map[string]string{ + internalapi.NodeIDHashLabelKey: testNodeNameHash, + internalapi.VolumeIDHashLabelKey: internalapiutil.HashIdentifier("vol-1"), + } + cr2.Labels = map[string]string{ + internalapi.NodeIDHashLabelKey: testNodeNameHash, + internalapi.VolumeIDHashLabelKey: internalapiutil.HashIdentifier("vol-2"), + } + cr3.Labels = map[string]string{ + internalapi.NodeIDHashLabelKey: testNodeNameHash, + internalapi.VolumeIDHashLabelKey: internalapiutil.HashIdentifier("vol-3"), + } + + fakeMetadata := storage.NewMemoryFS() + fakeMetadata.RegisterMetadata(metadata.Metadata{ + VolumeID: "vol-1", NextIssuanceTime: &renew1, + }) + fakeMetadata.RegisterMetadata(metadata.Metadata{ + VolumeID: "vol-2", NextIssuanceTime: &renew2, + }) + fakeMetadata.RegisterMetadata(metadata.Metadata{ + VolumeID: "vol-3", NextIssuanceTime: &renew3, + }) + + fakeClient := fake.NewSimpleClientset() + factory := externalversions.NewSharedInformerFactory(fakeClient, 0) + certRequestInformer := factory.Certmanager().V1().CertificateRequests() + + err := certRequestInformer.Informer().GetIndexer().Add(cr1) + assert.NoError(t, err) + err = certRequestInformer.Informer().GetIndexer().Add(cr2) + assert.NoError(t, err) + err = certRequestInformer.Informer().GetIndexer().Add(cr3) + assert.NoError(t, err) + + testLog := testr.New(t) + m := New(testNodeName, &testLog, prometheus.NewRegistry(), fakeMetadata, certRequestInformer.Lister()) + + // Check all three metrics exist + if err := testutil.CollectAndCompare(m.certificateRequestCollector, + strings.NewReader(readyMetadata+` + certmanager_csi_certificate_request_ready_status{condition="False",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr1",namespace="testns"} 0 + certmanager_csi_certificate_request_ready_status{condition="False",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr2",namespace="testns"} 0 + certmanager_csi_certificate_request_ready_status{condition="False",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr3",namespace="testns"} 1 + certmanager_csi_certificate_request_ready_status{condition="True",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr1",namespace="testns"} 0 + certmanager_csi_certificate_request_ready_status{condition="True",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr2",namespace="testns"} 1 + certmanager_csi_certificate_request_ready_status{condition="True",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr3",namespace="testns"} 0 + certmanager_csi_certificate_request_ready_status{condition="Unknown",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr1",namespace="testns"} 1 + certmanager_csi_certificate_request_ready_status{condition="Unknown",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr2",namespace="testns"} 0 + certmanager_csi_certificate_request_ready_status{condition="Unknown",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr3",namespace="testns"} 0 +`), + "certmanager_csi_certificate_request_ready_status", + ); err != nil { + t.Errorf("unexpected collecting result:\n%s", err) + } + if err := testutil.CollectAndCompare(m.certificateRequestCollector, + strings.NewReader(expiryMetadata+` + certmanager_csi_certificate_request_expiration_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr1",namespace="testns"} 100 + certmanager_csi_certificate_request_expiration_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr2",namespace="testns"} 200 + certmanager_csi_certificate_request_expiration_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr3",namespace="testns"} 300 +`), + "certmanager_csi_certificate_request_expiration_timestamp_seconds", + ); err != nil { + t.Errorf("unexpected collecting result:\n%s", err) + } + + if err := testutil.CollectAndCompare(m.certificateRequestCollector, + strings.NewReader(renewalTimeMetadata+` + certmanager_csi_certificate_request_renewal_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr1",namespace="testns"} 50 + certmanager_csi_certificate_request_renewal_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr2",namespace="testns"} 150 + certmanager_csi_certificate_request_renewal_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr3",namespace="testns"} 250 +`), + "certmanager_csi_certificate_request_renewal_timestamp_seconds", + ); err != nil { + t.Errorf("unexpected collecting result:\n%s", err) + } + + // Remove second certificate and check not exists + err = certRequestInformer.Informer().GetIndexer().Delete(cr2) + assert.NoError(t, err) + + if err := testutil.CollectAndCompare(m.certificateRequestCollector, + strings.NewReader(readyMetadata+` + certmanager_csi_certificate_request_ready_status{condition="False",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr1",namespace="testns"} 0 + certmanager_csi_certificate_request_ready_status{condition="False",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr3",namespace="testns"} 1 + certmanager_csi_certificate_request_ready_status{condition="True",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr1",namespace="testns"} 0 + certmanager_csi_certificate_request_ready_status{condition="True",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr3",namespace="testns"} 0 + certmanager_csi_certificate_request_ready_status{condition="Unknown",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr1",namespace="testns"} 1 + certmanager_csi_certificate_request_ready_status{condition="Unknown",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr3",namespace="testns"} 0 +`), + "certmanager_csi_certificate_request_ready_status", + ); err != nil { + t.Errorf("unexpected collecting result:\n%s", err) + } + if err := testutil.CollectAndCompare(m.certificateRequestCollector, + strings.NewReader(expiryMetadata+` + certmanager_csi_certificate_request_expiration_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr1",namespace="testns"} 100 + certmanager_csi_certificate_request_expiration_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr3",namespace="testns"} 300 +`), + "certmanager_csi_certificate_request_expiration_timestamp_seconds", + ); err != nil { + t.Errorf("unexpected collecting result:\n%s", err) + } + if err := testutil.CollectAndCompare(m.certificateRequestCollector, + strings.NewReader(renewalTimeMetadata+` + certmanager_csi_certificate_request_renewal_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr1",namespace="testns"} 50 + certmanager_csi_certificate_request_renewal_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr3",namespace="testns"} 250 +`), + "certmanager_csi_certificate_request_renewal_timestamp_seconds", + ); err != nil { + t.Errorf("unexpected collecting result:\n%s", err) + } + + // Remove all Certificates (even is already removed) and observe no Certificates + err = certRequestInformer.Informer().GetIndexer().Delete(cr1) + assert.NoError(t, err) + err = certRequestInformer.Informer().GetIndexer().Delete(cr2) + assert.NoError(t, err) + err = certRequestInformer.Informer().GetIndexer().Delete(cr3) + assert.NoError(t, err) + + if testutil.CollectAndCount(m.certificateRequestCollector, "certmanager_csi_certificate_request_ready_status") != 0 { + t.Errorf("unexpected collecting result") + } + if testutil.CollectAndCount(m.certificateRequestCollector, "certmanager_csi_certificate_request_expiration_timestamp_seconds") != 0 { + t.Errorf("unexpected collecting result") + } + if testutil.CollectAndCount(m.certificateRequestCollector, "certmanager_csi_certificate_request_renewal_timestamp_seconds") != 0 { + t.Errorf("unexpected collecting result") + } +} diff --git a/metrics/metrics.go b/metrics/metrics.go new file mode 100644 index 0000000..8e38bf5 --- /dev/null +++ b/metrics/metrics.go @@ -0,0 +1,117 @@ +/* +Copyright 2025 The cert-manager Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "net/http" + + cmlisters "github.com/cert-manager/cert-manager/pkg/client/listers/certmanager/v1" + "github.com/go-logr/logr" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" + + internalapiutil "github.com/cert-manager/csi-lib/internal/api/util" + "github.com/cert-manager/csi-lib/storage" +) + +const ( + // Namespace is the namespace for csi-lib metric names + namespace = "certmanager" + subsystem = "csi" +) + +// Metrics is designed to be a shared object for updating the metrics exposed by csi-lib +type Metrics struct { + log logr.Logger + registry *prometheus.Registry + + issueRequestsTotal *prometheus.CounterVec + issueErrorsTotal *prometheus.CounterVec + certificateRequestCollector prometheus.Collector +} + +// New creates a Metrics struct and populates it with prometheus metric types. +func New( + nodeId string, + logger *logr.Logger, + registry *prometheus.Registry, + metadataReader storage.MetadataReader, + certificateRequestLister cmlisters.CertificateRequestLister, +) *Metrics { + // issueRequestsTotal is a Prometheus counter for the number of issue() calls made by the driver. + issueRequestsTotal := prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "issue_requests_total", + Help: "The number of issue() calls made by the driver.", + }, + []string{"node", "volume"}, + ) + + // issueErrorsTotal is a Prometheus counter for the number of errors encountered + // during the driver issue() calls. + issueErrorsTotal := prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "issue_errors_total", + Help: "The number of errors encountered during the driver issue() calls.", + }, + []string{"node", "volume"}, + ) + + // Create server and register Prometheus metrics handler + m := &Metrics{ + log: logger.WithName("metrics"), + registry: registry, + + issueRequestsTotal: issueRequestsTotal, + issueErrorsTotal: issueErrorsTotal, + certificateRequestCollector: NewCertificateRequestCollector( + internalapiutil.HashIdentifier(nodeId), + metadataReader, + certificateRequestLister, + ), + } + + m.registry.MustRegister( + issueRequestsTotal, + issueErrorsTotal, + m.certificateRequestCollector, + ) + + return m +} + +// DefaultHandler returns a default prometheus metrics HTTP handler +func (m *Metrics) DefaultHandler() http.Handler { + mux := http.NewServeMux() + mux.Handle("/metrics", promhttp.HandlerFor(m.registry, promhttp.HandlerOpts{})) + + return mux +} + +// IncrementIssueCallCountTotal will increase the issue call counter for the driver. +func (m *Metrics) IncrementIssueCallCountTotal(nodeNameHash, volumeID string) { + m.issueRequestsTotal.WithLabelValues(nodeNameHash, volumeID).Inc() +} + +// IncrementIssueErrorCountTotal will increase count of errors during issue call of the driver. +func (m *Metrics) IncrementIssueErrorCountTotal(nodeNameHash, volumeID string) { + m.issueErrorsTotal.WithLabelValues(nodeNameHash, volumeID).Inc() +} diff --git a/test/driver/driver_testing.go b/test/driver/driver_testing.go index 165a204..ddc1fc3 100644 --- a/test/driver/driver_testing.go +++ b/test/driver/driver_testing.go @@ -36,6 +36,7 @@ import ( "github.com/cert-manager/csi-lib/driver" "github.com/cert-manager/csi-lib/manager" "github.com/cert-manager/csi-lib/metadata" + "github.com/cert-manager/csi-lib/metrics" "github.com/cert-manager/csi-lib/storage" ) @@ -45,6 +46,7 @@ type Options struct { Log *logr.Logger Client cmclient.Interface Mounter mount.Interface + Metrics *metrics.Metrics NodeID string MaxRequestsPerVolume int @@ -110,6 +112,7 @@ func Run(t *testing.T, opts Options) (Options, csi.NodeClient, func()) { Clock: opts.Clock, Log: opts.Log, NodeID: opts.NodeID, + Metrics: opts.Metrics, MaxRequestsPerVolume: opts.MaxRequestsPerVolume, GeneratePrivateKey: opts.GeneratePrivateKey, GenerateRequest: opts.GenerateRequest, diff --git a/test/integration/metrics_test.go b/test/integration/metrics_test.go new file mode 100644 index 0000000..4a16e77 --- /dev/null +++ b/test/integration/metrics_test.go @@ -0,0 +1,284 @@ +/* +Copyright 2025 The cert-manager Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package integration + +import ( + "context" + "crypto" + "crypto/x509" + "fmt" + "io" + "net" + "net/http" + "os" + "strings" + "testing" + "time" + + cmapi "github.com/cert-manager/cert-manager/pkg/apis/certmanager/v1" + cmmeta "github.com/cert-manager/cert-manager/pkg/apis/meta/v1" + "github.com/cert-manager/cert-manager/pkg/client/clientset/versioned/fake" + "github.com/cert-manager/cert-manager/pkg/client/informers/externalversions" + testcrypto "github.com/cert-manager/cert-manager/test/unit/crypto" + "github.com/container-storage-interface/spec/lib/go/csi" + "github.com/go-logr/logr/testr" + "github.com/prometheus/client_golang/prometheus" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" + fakeclock "k8s.io/utils/clock/testing" + + "github.com/cert-manager/csi-lib/manager" + "github.com/cert-manager/csi-lib/metadata" + "github.com/cert-manager/csi-lib/metrics" + "github.com/cert-manager/csi-lib/storage" + testdriver "github.com/cert-manager/csi-lib/test/driver" + testutil "github.com/cert-manager/csi-lib/test/util" +) + +var ( + testMetrics = func(ctx context.Context, metricsEndpoint, expectedOutput string) error { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, metricsEndpoint, nil) + if err != nil { + return err + } + resp, err := http.DefaultClient.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + + output, err := io.ReadAll(resp.Body) + if err != nil { + return err + } + + trimmedOutput := strings.SplitN(string(output), "# HELP go_gc_duration_seconds", 2)[0] + if strings.TrimSpace(trimmedOutput) != strings.TrimSpace(expectedOutput) { + return fmt.Errorf("got unexpected metrics output\nexp:\n%s\ngot:\n%s\n", + expectedOutput, trimmedOutput) + } + + return nil + } + + waitForMetrics = func(t *testing.T, ctx context.Context, metricsEndpoint, expectedOutput string) { + var lastErr error + err := wait.PollUntilContextCancel(ctx, time.Millisecond*100, true, func(ctx context.Context) (done bool, err error) { + if err := testMetrics(ctx, metricsEndpoint, expectedOutput); err != nil { + lastErr = err + return false, nil + } + + return true, nil + }) + if err != nil { + t.Fatalf("%s: failed to wait for expected metrics to be exposed: %s", err, lastErr) + } + } +) + +func TestMetricsServer(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + defer cancel() + + testLog := testr.New(t) + testNamespace := "test-ns" + testNodeId := "test-node" + + // Build metrics handler, and start metrics server with a random available port + store := storage.NewMemoryFS() + fakeClient := fake.NewSimpleClientset() + // client-go imposes a minimum resync period of 1 second, so that is the lowest we can go + // https://github.com/kubernetes/client-go/blob/5a019202120ab4dd7dfb3788e5cb87269f343ebe/tools/cache/shared_informer.go#L575 + factory := externalversions.NewSharedInformerFactory(fakeClient, time.Second) + certRequestInformer := factory.Certmanager().V1().CertificateRequests() + metricsHandler := metrics.New(testNodeId, &testLog, prometheus.NewRegistry(), store, certRequestInformer.Lister()) + factory.Start(ctx.Done()) + factory.WaitForCacheSync(ctx.Done()) + + // listenConfig + listenConfig := &net.ListenConfig{} + metricsLn, err := listenConfig.Listen(ctx, "tcp", "127.0.0.1:0") + if err != nil { + t.Fatal(err) + } + metricsServer := &http.Server{ + Addr: metricsLn.Addr().String(), + ReadTimeout: 8 * time.Second, + WriteTimeout: 8 * time.Second, + MaxHeaderBytes: 1 << 20, // 1 MiB + Handler: metricsHandler.DefaultHandler(), + } + + errCh := make(chan error) + go func() { + defer close(errCh) + testLog.Info("starting metrics server", "address", metricsLn.Addr()) + if err := metricsServer.Serve(metricsLn); err != http.ErrServerClosed { + errCh <- err + } + }() + defer func() { + // allow a timeout for graceful shutdown + shutdownCtx, cancel := context.WithTimeout(ctx, time.Second*5) + defer cancel() + + if err := metricsServer.Shutdown(shutdownCtx); err != nil { + t.Fatal(err) + } + err := <-errCh + if err != nil { + t.Fatal(err) + } + }() + + // Build and start the driver + clock := fakeclock.NewFakeClock(time.Now()) + opts, cl, stop := testdriver.Run(t, testdriver.Options{ + Store: store, + Clock: clock, + Metrics: metricsHandler, + Client: fakeClient, + NodeID: testNodeId, + Log: &testLog, + GeneratePrivateKey: func(meta metadata.Metadata) (crypto.PrivateKey, error) { + return nil, nil + }, + GenerateRequest: func(meta metadata.Metadata) (*manager.CertificateRequestBundle, error) { + return &manager.CertificateRequestBundle{ + Namespace: testNamespace, + IssuerRef: cmmeta.ObjectReference{ + Name: "test-issuer", + Kind: "test-issuer-kind", + Group: "test-issuer-group", + }, + }, nil + }, + SignRequest: func(meta metadata.Metadata, key crypto.PrivateKey, request *x509.CertificateRequest) (csr []byte, err error) { + return []byte{}, nil + }, + WriteKeypair: func(meta metadata.Metadata, key crypto.PrivateKey, chain []byte, ca []byte) error { + store.WriteFiles(meta, map[string][]byte{ + "ca": ca, + "cert": chain, + }) + nextIssuanceTime := time.Unix(200, 0) + meta.NextIssuanceTime = &nextIssuanceTime + return store.WriteMetadata(meta.VolumeID, meta) + }, + }) + defer stop() + + // Should expose no additional metrics + metricsEndpoint := fmt.Sprintf("http://%s/metrics", metricsServer.Addr) + waitForMetrics(t, ctx, metricsEndpoint, `# HELP certmanager_csi_managed_certificate_request_count The number of managed certificate requests by the csi driver. +# TYPE certmanager_csi_managed_certificate_request_count gauge +certmanager_csi_managed_certificate_request_count{node="f56fd9f8b"} 0 +# HELP certmanager_csi_managed_volume_count The number of managed volumes by the csi driver. +# TYPE certmanager_csi_managed_volume_count gauge +certmanager_csi_managed_volume_count{node="f56fd9f8b"} 0 +`) + + // Create a self-signed Certificate and wait for it to be issued + privKey := testcrypto.MustCreatePEMPrivateKey(t) + certTemplate := &cmapi.Certificate{ + ObjectMeta: metav1.ObjectMeta{Namespace: testNamespace, Name: "test"}, + Spec: cmapi.CertificateSpec{ + CommonName: "test.example.com", + }, + } + notBefore, notAfter := time.Unix(0, 0), time.Unix(300, 0) // renewal time will be 200 + selfSignedCertBytesWithValidity := testcrypto.MustCreateCertWithNotBeforeAfter(t, privKey, certTemplate, notBefore, notAfter) + go testutil.IssueOneRequest(ctx, t, opts.Client, testNamespace, selfSignedCertBytesWithValidity, []byte("ca bytes")) + + // Spin up a test pod + tmpDir, err := os.MkdirTemp("", "*") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(tmpDir) + _, err = cl.NodePublishVolume(ctx, &csi.NodePublishVolumeRequest{ + VolumeId: "test-vol", + VolumeContext: map[string]string{ + "csi.storage.k8s.io/ephemeral": "true", + "csi.storage.k8s.io/pod.name": "the-pod-name", + "csi.storage.k8s.io/pod.namespace": testNamespace, + }, + TargetPath: tmpDir, + Readonly: true, + }) + if err != nil { + t.Fatal(err) + } + + // Get the CSR name + req, err := testutil.WaitAndGetOneCertificateRequestInNamespace(ctx, opts.Client, testNamespace) + if err != nil { + t.Fatal(err) + } + + // Should expose that CertificateRequest as ready with expiry and renewal time + // node="f56fd9f8b" is the hash value of "test-node" + expectedOutputTemplate := `# HELP certmanager_csi_certificate_request_expiration_timestamp_seconds The timestamp after which the certificate request expires, expressed in Unix Epoch Time. +# TYPE certmanager_csi_certificate_request_expiration_timestamp_seconds gauge +certmanager_csi_certificate_request_expiration_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-cr-name",namespace="test-ns"} 300 +# HELP certmanager_csi_certificate_request_ready_status The ready status of the certificate request. +# TYPE certmanager_csi_certificate_request_ready_status gauge +certmanager_csi_certificate_request_ready_status{condition="False",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-cr-name",namespace="test-ns"} 0 +certmanager_csi_certificate_request_ready_status{condition="True",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-cr-name",namespace="test-ns"} 1 +certmanager_csi_certificate_request_ready_status{condition="Unknown",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-cr-name",namespace="test-ns"} 0 +# HELP certmanager_csi_certificate_request_renewal_timestamp_seconds The timestamp after which the certificate request should be renewed, expressed in Unix Epoch Time. +# TYPE certmanager_csi_certificate_request_renewal_timestamp_seconds gauge +certmanager_csi_certificate_request_renewal_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-cr-name",namespace="test-ns"} 200 +# HELP certmanager_csi_issue_requests_total The number of issue() calls made by the driver. +# TYPE certmanager_csi_issue_requests_total counter +certmanager_csi_issue_requests_total{node="f56fd9f8b",volume="test-vol"} 1 +# HELP certmanager_csi_managed_certificate_request_count The number of managed certificate requests by the csi driver. +# TYPE certmanager_csi_managed_certificate_request_count gauge +certmanager_csi_managed_certificate_request_count{node="f56fd9f8b"} 1 +# HELP certmanager_csi_managed_volume_count The number of managed volumes by the csi driver. +# TYPE certmanager_csi_managed_volume_count gauge +certmanager_csi_managed_volume_count{node="f56fd9f8b"} 1 +` + waitForMetrics(t, ctx, metricsEndpoint, strings.ReplaceAll(expectedOutputTemplate, "test-cr-name", req.Name)) + + // Delete the test pod + _, err = cl.NodeUnpublishVolume(ctx, &csi.NodeUnpublishVolumeRequest{ + VolumeId: "test-vol", + TargetPath: tmpDir, + }) + if err != nil { + t.Fatal(err) + } + err = opts.Client.CertmanagerV1().CertificateRequests(testNamespace).Delete(ctx, req.Name, metav1.DeleteOptions{}) + if err != nil { + t.Fatal(err) + } + + // Should expose no CertificateRequest and only metrics counters + waitForMetrics(t, ctx, metricsEndpoint, `# HELP certmanager_csi_issue_requests_total The number of issue() calls made by the driver. +# TYPE certmanager_csi_issue_requests_total counter +certmanager_csi_issue_requests_total{node="f56fd9f8b",volume="test-vol"} 1 +# HELP certmanager_csi_managed_certificate_request_count The number of managed certificate requests by the csi driver. +# TYPE certmanager_csi_managed_certificate_request_count gauge +certmanager_csi_managed_certificate_request_count{node="f56fd9f8b"} 0 +# HELP certmanager_csi_managed_volume_count The number of managed volumes by the csi driver. +# TYPE certmanager_csi_managed_volume_count gauge +certmanager_csi_managed_volume_count{node="f56fd9f8b"} 0 +`) + +} diff --git a/test/util/testutil.go b/test/util/testutil.go index d405e4a..6d9dd95 100644 --- a/test/util/testutil.go +++ b/test/util/testutil.go @@ -29,7 +29,7 @@ import ( "k8s.io/apimachinery/pkg/util/wait" ) -func waitAndGetOneCertificateRequestInNamespace(ctx context.Context, client cmclient.Interface, ns string) (*cmapi.CertificateRequest, error) { +func WaitAndGetOneCertificateRequestInNamespace(ctx context.Context, client cmclient.Interface, ns string) (*cmapi.CertificateRequest, error) { var req *cmapi.CertificateRequest if err := wait.PollUntilContextCancel(ctx, time.Millisecond*50, true, func(ctx context.Context) (done bool, err error) { reqs, err := client.CertmanagerV1().CertificateRequests(ns).List(ctx, metav1.ListOptions{}) @@ -53,7 +53,7 @@ func waitAndGetOneCertificateRequestInNamespace(ctx context.Context, client cmcl func IssueOneRequest(ctx context.Context, t *testing.T, client cmclient.Interface, namespace string, cert, ca []byte) { if err := func() error { - req, err := waitAndGetOneCertificateRequestInNamespace(ctx, client, namespace) + req, err := WaitAndGetOneCertificateRequestInNamespace(ctx, client, namespace) if err != nil { return err } @@ -80,7 +80,7 @@ func IssueOneRequest(ctx context.Context, t *testing.T, client cmclient.Interfac func SetCertificateRequestConditions(ctx context.Context, t *testing.T, client cmclient.Interface, namespace string, conditions ...cmapi.CertificateRequestCondition) { if err := func() error { - req, err := waitAndGetOneCertificateRequestInNamespace(ctx, client, namespace) + req, err := WaitAndGetOneCertificateRequestInNamespace(ctx, client, namespace) if err != nil { return err }