From 77edf3105c20d9b15301ebb96f584d5d2b20b9fa Mon Sep 17 00:00:00 2001 From: Michael Aspinwall Date: Wed, 25 Jun 2025 16:21:40 +0000 Subject: [PATCH] fix: Status update only adding 1 device per claim --- examples/resourceclaimtemplate_double.yaml | 65 ++++++++++++++++++++++ pkg/driver/nri_hooks.go | 22 ++++++-- tests/e2e.bats | 29 ++++++++++ 3 files changed, 110 insertions(+), 6 deletions(-) create mode 100644 examples/resourceclaimtemplate_double.yaml diff --git a/examples/resourceclaimtemplate_double.yaml b/examples/resourceclaimtemplate_double.yaml new file mode 100644 index 00000000..d507b400 --- /dev/null +++ b/examples/resourceclaimtemplate_double.yaml @@ -0,0 +1,65 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +apiVersion: resource.k8s.io/v1beta1 +kind: DeviceClass +metadata: + name: multinic +spec: + selectors: + - cel: + expression: device.driver == "dra.net" +--- +apiVersion: resource.k8s.io/v1beta1 +kind: ResourceClaimTemplate +metadata: + name: phy-interfaces-template +spec: + spec: + devices: + requests: + - name: phy-interfaces-template + count: 2 + deviceClassName: multinic + selectors: + - cel: + expression: device.attributes["dra.net"].ifName == "dummy6" || device.attributes["dra.net"].ifName == "dummy7" +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: server-deployment + labels: + app: MyApp +spec: + replicas: 1 + selector: + matchLabels: + app: MyApp + template: + metadata: + labels: + app: MyApp + spec: + resourceClaims: + - name: phy-interfaces + resourceClaimTemplateName: phy-interfaces-template + containers: + - name: agnhost + image: registry.k8s.io/e2e-test-images/agnhost:2.54 + args: + - netexec + - --http-port=80 + ports: + - containerPort: 80 diff --git a/pkg/driver/nri_hooks.go b/pkg/driver/nri_hooks.go index 2cc32073..16631005 100644 --- a/pkg/driver/nri_hooks.go +++ b/pkg/driver/nri_hooks.go @@ -108,10 +108,17 @@ func (np *NetworkDriver) RunPodSandbox(ctx context.Context, pod *api.PodSandbox) // store the Pod metadata in the db np.netdb.AddPodNetns(podKey(pod), ns) + // Track all the status updates needed for the resource claims of the pod. + statusUpdates := map[types.NamespacedName]*resourceapply.ResourceClaimStatusApplyConfiguration{} // Process the configurations of the ResourceClaim for deviceName, config := range podConfig { klog.V(4).Infof("RunPodSandbox processing device: %s with config: %#v", deviceName, config) - resourceClaimStatus := resourceapply.ResourceClaimStatus() + resourceClaim := types.NamespacedName{Name: config.Claim.Name, Namespace: config.Claim.Namespace} + resourceClaimStatus := statusUpdates[resourceClaim] + if statusUpdates[resourceClaim] == nil { + resourceClaimStatus = resourceapply.ResourceClaimStatus() + statusUpdates[resourceClaim] = resourceClaimStatus + } // resourceClaim status for this specific device resourceClaimStatusDevice := resourceapply. AllocatedDeviceStatus(). @@ -196,22 +203,25 @@ func (np *NetworkDriver) RunPodSandbox(ctx context.Context, pod *api.PodSandbox) } // Ok resourceClaimStatus.WithDevices(resourceClaimStatusDevice) - resourceClaimApply := resourceapply.ResourceClaim(config.Claim.Name, config.Claim.Namespace).WithStatus(resourceClaimStatus) - // do not block the handler to update the status + } + // do not block the handler to update the status + for claim, status := range statusUpdates { + resourceClaimApply := resourceapply.ResourceClaim(claim.Name, claim.Namespace).WithStatus(status) go func() { ctxStatus, cancel := context.WithTimeout(context.Background(), 3*time.Second) defer cancel() - _, err = np.kubeClient.ResourceV1beta1().ResourceClaims(config.Claim.Namespace).ApplyStatus(ctxStatus, + _, err := np.kubeClient.ResourceV1beta1().ResourceClaims(claim.Namespace).ApplyStatus(ctxStatus, resourceClaimApply, metav1.ApplyOptions{FieldManager: np.driverName, Force: true}, ) if err != nil { - klog.Infof("failed to update status for claim %s/%s : %v", config.Claim.Namespace, config.Claim.Name, err) + klog.Infof("failed to update status for claim %s/%s : %v", claim.Namespace, claim.Name, err) } else { - klog.V(4).Infof("update status for claim %s/%s", config.Claim.Namespace, config.Claim.Name) + klog.V(4).Infof("updated status for claim %s/%s", claim.Namespace, claim.Name) } }() } + return nil } diff --git a/tests/e2e.bats b/tests/e2e.bats index 09e5fbd5..5d8ad27a 100644 --- a/tests/e2e.bats +++ b/tests/e2e.bats @@ -226,3 +226,32 @@ load 'test_helper/bats-assert/load' kubectl delete -f "$BATS_TEST_DIRNAME"/../examples/resourceclaim_disable_ebpf.yaml kubectl delete -f "$BATS_TEST_DIRNAME"/../examples/deviceclass.yaml } + +# Test case for validating multiple devices allocated to the same pod. +@test "2 dummy interfaces with IP addresses ResourceClaimTemplate" { + docker exec "$CLUSTER_NAME"-worker bash -c "ip link add dummy6 type dummy" + docker exec "$CLUSTER_NAME"-worker bash -c "ip link set up dev dummy6" + docker exec "$CLUSTER_NAME"-worker bash -c "ip addr add 169.254.169.13/32 dev dummy6" + + docker exec "$CLUSTER_NAME"-worker bash -c "ip link add dummy7 type dummy" + docker exec "$CLUSTER_NAME"-worker bash -c "ip link set up dev dummy7" + docker exec "$CLUSTER_NAME"-worker bash -c "ip addr add 169.254.169.14/32 dev dummy7" + + kubectl apply -f "$BATS_TEST_DIRNAME"/../examples/deviceclass.yaml + kubectl apply -f "$BATS_TEST_DIRNAME"/../examples/resourceclaimtemplate_double.yaml + kubectl wait --timeout=30s --for=condition=ready pods -l app=MyApp + POD_NAME=$(kubectl get pods -l app=MyApp -o name) + run kubectl exec $POD_NAME -- ip addr show dummy6 + assert_success + assert_output --partial "169.254.169.13" + run kubectl exec $POD_NAME -- ip addr show dummy7 + assert_success + assert_output --partial "169.254.169.14" + run kubectl get resourceclaims -o=jsonpath='{.items[0].status.devices[*]}' + assert_success + assert_output --partial "169.254.169.13" + assert_output --partial "169.254.169.14" + + kubectl delete -f "$BATS_TEST_DIRNAME"/../examples/deviceclass.yaml + kubectl delete -f "$BATS_TEST_DIRNAME"/../examples/resourceclaimtemplate_double.yaml +}