From 2882cad7621fde2a0f52f0a8d54466a7666194d6 Mon Sep 17 00:00:00 2001 From: Sunyee Cai Date: Thu, 22 May 2025 00:38:43 +0000 Subject: [PATCH] Split apart cloudNetwork device attribute The initial attribute of "cloudNetwork" contains a string with both the network name and the project number, which may exceed the limitation of attribute size. This commit splits the "cloudNetwork" into two parts, as well as introduces provider-specific function to manage the attributes. --- pkg/cloudprovider/cloud.go | 10 ++++ pkg/cloudprovider/gce/gce.go | 21 +++++++ pkg/inventory/cloud.go | 16 ++++-- pkg/inventory/cloud_test.go | 107 ++++++++++++++++++----------------- pkg/inventory/db.go | 5 +- 5 files changed, 99 insertions(+), 60 deletions(-) diff --git a/pkg/cloudprovider/cloud.go b/pkg/cloudprovider/cloud.go index 2226bbbc..739a0c4e 100644 --- a/pkg/cloudprovider/cloud.go +++ b/pkg/cloudprovider/cloud.go @@ -19,6 +19,7 @@ package cloudprovider type CloudInstance struct { Name string Type string + Provider CloudProvider AcceleratorProtocol string Interfaces []NetworkInterface } @@ -30,3 +31,12 @@ type NetworkInterface struct { MTU int `json:"mtu,omitempty"` Network string `json:"network,omitempty"` } + +// CloudProvider represents the type of cloud provider. +type CloudProvider string + +const ( + CloudProviderGCE CloudProvider = "GCE" + CloudProviderAWS CloudProvider = "AWS" + CloudProviderAzure CloudProvider = "Azure" +) diff --git a/pkg/cloudprovider/gce/gce.go b/pkg/cloudprovider/gce/gce.go index 07122acb..1828cb8b 100644 --- a/pkg/cloudprovider/gce/gce.go +++ b/pkg/cloudprovider/gce/gce.go @@ -19,6 +19,7 @@ package gce import ( "context" "encoding/json" + "fmt" "time" "cloud.google.com/go/compute/metadata" @@ -27,6 +28,7 @@ import ( "k8s.io/klog/v2" "github.com/google/dranet/pkg/cloudprovider" + resourceapi "k8s.io/api/resource/v1beta1" ) // GPUDirectSupport represents the type of GPUDirect support for a given machine type. @@ -57,6 +59,7 @@ var ( // GPUDirect-RDMA: one HPC VPC, one subnet per NIC, 8896MTU ) +// GetInstance retrieves GCE instance properties by querying the metadata server. func GetInstance(ctx context.Context) (*cloudprovider.CloudInstance, error) { var instance *cloudprovider.CloudInstance // metadata server can not be available during startup @@ -84,6 +87,7 @@ func GetInstance(ctx context.Context) (*cloudprovider.CloudInstance, error) { instance = &cloudprovider.CloudInstance{ Name: instanceName, Type: instanceType, + Provider: cloudprovider.CloudProviderGCE, AcceleratorProtocol: string(protocol), } if err = json.Unmarshal([]byte(gceInterfacesRaw), &instance.Interfaces); err != nil { @@ -97,3 +101,20 @@ func GetInstance(ctx context.Context) (*cloudprovider.CloudInstance, error) { } return instance, nil } + +// GetGCEAttributes fetches all attributes related to the provided GCP network. +func GetGCEAttributes(network string) map[resourceapi.QualifiedName]resourceapi.DeviceAttribute { + attributes := make(map[resourceapi.QualifiedName]resourceapi.DeviceAttribute) + var projectNumber int64 + var name string + // Use custom parsing because the network path is + // different from the format expected by k8s-cloud-provider + _, err := fmt.Sscanf(network, "projects/%d/networks/%s", &projectNumber, &name) + if err != nil { + klog.Warningf("Error parsing network %q : %v", network, err) + return nil + } + attributes["gce.dra.net/networkName"] = resourceapi.DeviceAttribute{StringValue: &name} + attributes["gce.dra.net/networkProjectNumber"] = resourceapi.DeviceAttribute{IntValue: &projectNumber} + return attributes +} diff --git a/pkg/inventory/cloud.go b/pkg/inventory/cloud.go index b5001ee3..4b3d74d7 100644 --- a/pkg/inventory/cloud.go +++ b/pkg/inventory/cloud.go @@ -26,6 +26,7 @@ import ( "github.com/google/dranet/pkg/cloudprovider" "github.com/google/dranet/pkg/cloudprovider/gce" + resourceapi "k8s.io/api/resource/v1beta1" ) // getInstanceProperties get the instace properties and stores them in a global variable to be used in discovery @@ -46,16 +47,23 @@ func getInstanceProperties(ctx context.Context) *cloudprovider.CloudInstance { return instance } -func cloudNetwork(mac string, instance *cloudprovider.CloudInstance) string { +// getProviderAttributes retrieves cloud provider-specific attributes for a network interface +func getProviderAttributes(mac string, instance *cloudprovider.CloudInstance) map[resourceapi.QualifiedName]resourceapi.DeviceAttribute { if instance == nil { - return "" + klog.Warningf("instance metadata is nil, cannot get provider attributes.") + return nil + } + if instance.Provider != cloudprovider.CloudProviderGCE { + klog.Warningf("cloud provider %q is not supported", instance.Provider) + return nil } for _, cloudInterface := range instance.Interfaces { if cloudInterface.Mac == mac { - return getLastSegmentAndTruncate(cloudInterface.Network, 64) // max size for an attribute value + return gce.GetGCEAttributes(cloudInterface.Network) } } - return "" + klog.Warningf("no matching cloud interface found for mac %s", mac) + return nil } // getLastSegmentAndTruncate extracts the last segment from a path diff --git a/pkg/inventory/cloud_test.go b/pkg/inventory/cloud_test.go index d54df01c..84971f98 100644 --- a/pkg/inventory/cloud_test.go +++ b/pkg/inventory/cloud_test.go @@ -17,90 +17,91 @@ limitations under the License. package inventory import ( + "github.com/google/go-cmp/cmp" "testing" "github.com/google/dranet/pkg/cloudprovider" + resourceapi "k8s.io/api/resource/v1beta1" + "k8s.io/utils/ptr" ) -func Test_cloudNetwork(t *testing.T) { - type args struct { +func TestGetProviderAttributes(t *testing.T) { + tests := []struct { + name string mac string instance *cloudprovider.CloudInstance - } - tests := []struct { - name string - args args - want string + want map[resourceapi.QualifiedName]resourceapi.DeviceAttribute }{ { - name: "nil instance", - args: args{ - mac: "00:11:22:33:44:55", - instance: nil, - }, - want: "", + name: "nil instance", + mac: "00:11:22:33:44:55", + instance: nil, + want: nil, }, { - name: "empty interfaces", - args: args{ - mac: "00:11:22:33:44:55", - instance: &cloudprovider.CloudInstance{ - Interfaces: []cloudprovider.NetworkInterface{}, - }, + name: "instance with no interfaces", + mac: "00:11:22:33:44:55", + instance: &cloudprovider.CloudInstance{ + Provider: cloudprovider.CloudProviderGCE, + Interfaces: []cloudprovider.NetworkInterface{}, }, - want: "", + want: nil, }, { - name: "mac match", - args: args{ - mac: "00:11:22:33:44:55", - instance: &cloudprovider.CloudInstance{ - Interfaces: []cloudprovider.NetworkInterface{ - {Mac: "aa:bb:cc:dd:ee:ff", Network: "/projects/proj1/global/networks/net1"}, - {Mac: "00:11:22:33:44:55", Network: "/projects/proj2/global/networks/my-super-long-network-name-that-will-be-truncated-and-then-some-more"}, - }, + name: "MAC not found in instance interfaces", + mac: "00:11:22:33:44:FF", // MAC that won't be found + instance: &cloudprovider.CloudInstance{ + Provider: cloudprovider.CloudProviderGCE, + Interfaces: []cloudprovider.NetworkInterface{ + {Mac: "00:11:22:33:44:55", Network: "projects/12345/networks/test-network"}, }, }, - want: "my-super-long-network-name-that-will-be-truncated-and-then-some-", // Expected 64 char truncation + want: nil, }, { - name: "mac no match", - args: args{ - mac: "11:22:33:44:55:66", - instance: &cloudprovider.CloudInstance{ - Interfaces: []cloudprovider.NetworkInterface{ - {Mac: "aa:bb:cc:dd:ee:ff", Network: "/projects/proj1/global/networks/net1"}, - {Mac: "00:11:22:33:44:55", Network: "/projects/proj2/global/networks/net2"}, - }, + name: "GCE provider, MAC found, valid network", + mac: "00:11:22:33:44:55", + instance: &cloudprovider.CloudInstance{ + Provider: cloudprovider.CloudProviderGCE, + Interfaces: []cloudprovider.NetworkInterface{ + {Mac: "00:11:22:33:44:55", Network: "projects/12345/networks/test-network"}, + {Mac: "AA:BB:CC:DD:EE:FF", Network: "projects/67890/networks/other-network"}, }, }, - want: "", + want: map[resourceapi.QualifiedName]resourceapi.DeviceAttribute{ + "gce.dra.net/networkName": {StringValue: ptr.To("test-network")}, + "gce.dra.net/networkProjectNumber": {IntValue: ptr.To(int64(12345))}, + }, }, { - name: "mac match, network needs no truncation", - args: args{ - mac: "aa:bb:cc:dd:ee:ff", - instance: &cloudprovider.CloudInstance{ - Interfaces: []cloudprovider.NetworkInterface{ - {Mac: "aa:bb:cc:dd:ee:ff", Network: "global/networks/short-net"}, - }, + name: "GCE provider, MAC found, invalid network string for GCE parsing", + mac: "00:11:22:33:44:55", + instance: &cloudprovider.CloudInstance{ + Provider: cloudprovider.CloudProviderGCE, + Interfaces: []cloudprovider.NetworkInterface{ + {Mac: "00:11:22:33:44:55", Network: "invalid-gce-network-string"}, }, }, - want: "short-net", + want: nil, // gce.GetGCEAttributes returns nil for invalid network string }, { - name: "mac match, empty network string", - args: args{ - mac: "00:11:22:33:44:55", - instance: &cloudprovider.CloudInstance{Interfaces: []cloudprovider.NetworkInterface{{Mac: "00:11:22:33:44:55", Network: ""}}}, + name: "Unsupported provider, MAC found", + mac: "00:11:22:33:44:55", + instance: &cloudprovider.CloudInstance{ + Provider: cloudprovider.CloudProviderAWS, // Unsupported provider + Interfaces: []cloudprovider.NetworkInterface{ + {Mac: "00:11:22:33:44:55", Network: "aws-network-info"}, + }, }, - want: "", + want: nil, }, } + for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if got := cloudNetwork(tt.args.mac, tt.args.instance); got != tt.want { - t.Errorf("cloudNetwork() = %v, want %v", got, tt.want) + got := getProviderAttributes(tt.mac, tt.instance) + if diff := cmp.Diff(tt.want, got); diff != "" { + t.Errorf("getProviderAttributes() got = %v, want %v", got, tt.want) } }) } diff --git a/pkg/inventory/db.go b/pkg/inventory/db.go index 5a22128a..0a603e34 100644 --- a/pkg/inventory/db.go +++ b/pkg/inventory/db.go @@ -271,9 +271,8 @@ func (db *DB) netdevToDRAdev(ifName string) (*resourceapi.Device, error) { } mac := link.Attrs().HardwareAddr.String() - // this is bounded and small number O(N) is ok - if network := cloudNetwork(mac, db.instance); network != "" { - device.Basic.Attributes["dra.net/cloudNetwork"] = resourceapi.DeviceAttribute{StringValue: &network} + for name, attribute := range getProviderAttributes(mac, db.instance) { + device.Basic.Attributes[name] = attribute } return &device, nil