From 1bbaa258d5399d82b821b2b5faa3bf625bff0577 Mon Sep 17 00:00:00 2001 From: Aman Agrawal Date: Tue, 16 Sep 2025 17:38:48 +0000 Subject: [PATCH] add lastPublishedTime metric in PublishResources and relevant test. Restructure objects to interfaces for better testability. --- pkg/driver/dra_hooks.go | 2 ++ pkg/driver/dra_hooks_test.go | 43 +++++++++++++++++++++-- pkg/driver/driver.go | 24 +++++++++++-- pkg/driver/driver_test.go | 67 ++++++++++++++++++++++++++++++++++++ pkg/driver/metrics.go | 8 ++++- 5 files changed, 139 insertions(+), 5 deletions(-) create mode 100644 pkg/driver/driver_test.go diff --git a/pkg/driver/dra_hooks.go b/pkg/driver/dra_hooks.go index c84bccee..1be0171c 100644 --- a/pkg/driver/dra_hooks.go +++ b/pkg/driver/dra_hooks.go @@ -68,6 +68,8 @@ func (np *NetworkDriver) PublishResources(ctx context.Context) { err := np.draPlugin.PublishResources(ctx, resources) if err != nil { klog.Error(err, "unexpected error trying to publish resources") + } else { + lastPublishedTime.SetToCurrentTime() } case <-ctx.Done(): klog.Error(ctx.Err(), "context canceled") diff --git a/pkg/driver/dra_hooks_test.go b/pkg/driver/dra_hooks_test.go index 8d5f9488..ed55424c 100644 --- a/pkg/driver/dra_hooks_test.go +++ b/pkg/driver/dra_hooks_test.go @@ -18,16 +18,17 @@ package driver import ( "context" + "fmt" "strings" "testing" "github.com/google/dranet/pkg/apis" - "github.com/google/dranet/pkg/inventory" "github.com/prometheus/client_golang/prometheus/testutil" resourcev1 "k8s.io/api/resource/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/dynamic-resource-allocation/kubeletplugin" + ) func TestPublishResourcesPrometheusMetrics(t *testing.T) { @@ -142,7 +143,7 @@ func TestPrepareResourceClaimsMetrics(t *testing.T) { draPluginRequestsLatencySeconds.Reset() np := &NetworkDriver{ - netdb: inventory.New(), + netdb: newFakeInventoryDB(), driverName: "test.driver", } @@ -239,3 +240,41 @@ func TestUnprepareResourceClaimsMetrics(t *testing.T) { } }) } + +func TestPublishResourcesMetrics(t *testing.T) { + ctx, cancel := context.WithCancel(t.Context()) + defer cancel() + + fakeDraPlugin := newFakePluginHelper() + fakeNetDB := newFakeInventoryDB() + + np := &NetworkDriver{ + draPlugin: fakeDraPlugin, + netdb: fakeNetDB, + nodeName: "test-node", + } + + go np.PublishResources(ctx) + + t.Run("Success", func(t *testing.T) { + lastPublishedTime.Set(0) + fakeNetDB.resources <- []resourcev1.Device{} + <-fakeDraPlugin.publishCalled + + if testutil.ToFloat64(lastPublishedTime) == 0 { + t.Errorf("lastPublishedTime should have been updated, but it is 0") + } + }) + + t.Run("Failure", func(t *testing.T) { + lastPublishedTime.Set(0) + fakeDraPlugin.publishErr = fmt.Errorf("mock publish error") + fakeNetDB.resources <- []resourcev1.Device{} + <-fakeDraPlugin.publishCalled + + if testutil.ToFloat64(lastPublishedTime) != 0 { + t.Errorf("lastPublishedTime should not have been updated, but it is %f", testutil.ToFloat64(lastPublishedTime)) + } + }) +} + diff --git a/pkg/driver/driver.go b/pkg/driver/driver.go index aa8ca4e6..a8f8bba7 100644 --- a/pkg/driver/driver.go +++ b/pkg/driver/driver.go @@ -30,10 +30,13 @@ import ( "github.com/containerd/nri/pkg/stub" "github.com/vishvananda/netlink" + resourceapi "k8s.io/api/resource/v1" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes" "k8s.io/dynamic-resource-allocation/kubeletplugin" + "k8s.io/dynamic-resource-allocation/resourceslice" "k8s.io/klog/v2" + registerapi "k8s.io/kubelet/pkg/apis/pluginregistration/v1" ) const ( @@ -46,6 +49,23 @@ const ( maxAttempts = 5 ) +// This interface is our internal contract for the behavior we need from a *kubeletplugin.Helper, created specifically so we can fake it in tests. +type pluginHelper interface { + PublishResources(context.Context, resourceslice.DriverResources) error + Stop() + RegistrationStatus() *registerapi.RegistrationStatus +} + +// This interface is our internal contract for the behavior we need from a *inventory.DB, created specifically so we can fake it in tests. +type inventoryDB interface { + Run(context.Context) error + GetResources(context.Context) <-chan []resourceapi.Device + GetNetInterfaceName(string) (string, error) + AddPodNetNs(podKey string, netNs string) + RemovePodNetNs(podKey string) + GetPodNetNs(podKey string) (netNs string) +} + // WithFilter func WithFilter(filter cel.Program) Option { return func(o *NetworkDriver) { @@ -57,11 +77,11 @@ type NetworkDriver struct { driverName string nodeName string kubeClient kubernetes.Interface - draPlugin *kubeletplugin.Helper + draPlugin pluginHelper nriPlugin stub.Stub // contains the host interfaces - netdb *inventory.DB + netdb inventoryDB celProgram cel.Program // Cache the rdma shared mode state diff --git a/pkg/driver/driver_test.go b/pkg/driver/driver_test.go new file mode 100644 index 00000000..19e801da --- /dev/null +++ b/pkg/driver/driver_test.go @@ -0,0 +1,67 @@ +package driver + +import ( + "context" + resourcev1 "k8s.io/api/resource/v1" + "k8s.io/dynamic-resource-allocation/resourceslice" + registerapi "k8s.io/kubelet/pkg/apis/pluginregistration/v1" +) + +// fakeDraPlugin is a mock implementation of the pluginHelper interface for testing. +type fakePluginHelper struct { + publishErr error + publishCalled chan struct{} + registrationStatus *registerapi.RegistrationStatus +} + +func newFakePluginHelper() *fakePluginHelper { + return &fakePluginHelper{ + publishCalled: make(chan struct{}, 1), + } +} + +func (m *fakePluginHelper) PublishResources(_ context.Context, _ resourceslice.DriverResources) error { + if m.publishCalled != nil { + m.publishCalled <- struct{}{} + } + return m.publishErr +} + +func (m *fakePluginHelper) Stop() {} + +func (m *fakePluginHelper) RegistrationStatus() *registerapi.RegistrationStatus { + return m.registrationStatus +} + +// mockNetDB is a mock implementation of the inventoryDB interface for testing. +type fakeInventoryDB struct { + resources chan []resourcev1.Device + podNetNs map[string]string +} + +func newFakeInventoryDB() *fakeInventoryDB { + return &fakeInventoryDB{ + resources: make(chan []resourcev1.Device, 1), + podNetNs: make(map[string]string), + } +} + +func (m *fakeInventoryDB) Run(_ context.Context) error { return nil } + +func (m *fakeInventoryDB) GetResources(_ context.Context) <-chan []resourcev1.Device { + return m.resources +} + +func (m *fakeInventoryDB) GetNetInterfaceName(_ string) (string, error) { return "", nil } + +func (m *fakeInventoryDB) AddPodNetNs(podKey string, netNs string) { + m.podNetNs[podKey] = netNs +} + +func (m *fakeInventoryDB) RemovePodNetNs(podKey string) { + delete(m.podNetNs, podKey) +} + +func (m *fakeInventoryDB) GetPodNetNs(podKey string) string { + return m.podNetNs[podKey] +} diff --git a/pkg/driver/metrics.go b/pkg/driver/metrics.go index b1f68501..dba77806 100644 --- a/pkg/driver/metrics.go +++ b/pkg/driver/metrics.go @@ -45,6 +45,7 @@ func registerMetrics() { prometheus.MustRegister(nriPluginRequestsTotal) prometheus.MustRegister(nriPluginRequestsLatencySeconds) prometheus.MustRegister(publishedDevicesTotal) + prometheus.MustRegister(lastPublishedTime) }) } @@ -79,5 +80,10 @@ var ( Name: "published_devices_total", Help: "Total number of published devices.", }, []string{"feature"}) + lastPublishedTime = prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: "dranet", + Subsystem: "driver", + Name: "last_published_time_seconds", + Help: "The timestamp of the last successful resource publication.", + }) ) -