Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 13 additions & 19 deletions pkg/client/sdk.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,12 +206,7 @@ func (c *SdkStackitClient) CreateServer(ctx context.Context, projectID, region s
}

// Convert SDK server to our Server type
server := &Server{
ID: sdkServer.GetId(),
Name: sdkServer.GetName(),
Status: sdkServer.GetStatus(),
Labels: convertLabelsFromSDK(sdkServer.Labels),
}
server := convertSDKServerToServer(sdkServer)

return server, nil
}
Expand All @@ -228,12 +223,7 @@ func (c *SdkStackitClient) GetServer(ctx context.Context, projectID, region, ser
}

// Convert SDK server to our Server type
server := &Server{
ID: sdkServer.GetId(),
Name: sdkServer.GetName(),
Status: sdkServer.GetStatus(),
Labels: convertLabelsFromSDK(sdkServer.Labels),
}
server := convertSDKServerToServer(sdkServer)

return server, nil
}
Expand Down Expand Up @@ -282,13 +272,7 @@ func (c *SdkStackitClient) ListServers(ctx context.Context, projectID, region st
if sdkResponse.Items != nil {
for i := range *sdkResponse.Items {
sdkServer := &(*sdkResponse.Items)[i]

server := &Server{
ID: sdkServer.GetId(),
Name: sdkServer.GetName(),
Status: sdkServer.GetStatus(),
Labels: convertLabelsFromSDK(sdkServer.Labels),
}
server := convertSDKServerToServer(sdkServer)
servers = append(servers, server)
}
}
Expand Down Expand Up @@ -358,6 +342,16 @@ func convertSDKNICtoNIC(nic *iaas.NIC) *NIC {
}
}

func convertSDKServerToServer(sdkServer *iaas.Server) *Server {
return &Server{
ID: sdkServer.GetId(),
Name: sdkServer.GetName(),
Status: sdkServer.GetStatus(),
ErrorMessage: sdkServer.GetErrorMessage(),
Labels: convertLabelsFromSDK(sdkServer.Labels),
}
}

// isNotFoundError checks if an error is a 404 Not Found error from the SDK
func isNotFoundError(err error) bool {
if err == nil {
Expand Down
9 changes: 5 additions & 4 deletions pkg/client/stackit.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,11 @@ type AgentRequest struct {

// Server represents a STACKIT server response
type Server struct {
ID string `json:"id"`
Name string `json:"name"`
Status string `json:"status"`
Labels map[string]string `json:"labels,omitempty"`
ID string `json:"id"`
Name string `json:"name"`
Status string `json:"status"`
ErrorMessage string `json:"errorMessage,omitempty"`
Labels map[string]string `json:"labels,omitempty"`
}

// NIC represents a STACKIT network interface
Expand Down
31 changes: 31 additions & 0 deletions pkg/provider/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"github.com/stackitcloud/machine-controller-manager-provider-stackit/pkg/client"
api "github.com/stackitcloud/machine-controller-manager-provider-stackit/pkg/provider/apis"
"github.com/stackitcloud/machine-controller-manager-provider-stackit/pkg/provider/apis/validation"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/klog/v2"
)

Expand Down Expand Up @@ -72,10 +73,21 @@ func (p *Provider) CreateMachine(ctx context.Context, req *driver.CreateMachineR
server, err = p.client.CreateServer(ctx, projectID, providerSpec.Region, p.createServerRequest(req, providerSpec))
if err != nil {
klog.Errorf("Failed to create server for machine %q: %v", req.Machine.Name, err)
if isResourceExhaustedError(err) {
return nil, status.Error(codes.ResourceExhausted, fmt.Sprintf("failed to create server: %v", err))
}
return nil, status.Error(codes.Unavailable, fmt.Sprintf("failed to create server: %v", err))
}
}

if err := p.WaitUntilServerRunning(ctx, projectID, providerSpec.Region, server.ID); err != nil {
klog.Errorf("Failed waiting for server %q to reach ACTIVE state: %v", req.Machine.Name, err)
if isResourceExhaustedError(err) {
return nil, status.Error(codes.ResourceExhausted, fmt.Sprintf("failed waiting for server to be ACTIVE: %v", err))
}
return nil, status.Error(codes.DeadlineExceeded, fmt.Sprintf("failed waiting for server to be ACTIVE: %v", err))
}

if err := p.patchNetworkInterface(ctx, projectID, server.ID, providerSpec); err != nil {
klog.Errorf("Failed to patch network interface for server %q: %v", req.Machine.Name, err)
return nil, status.Error(codes.Unavailable, fmt.Sprintf("failed to patch network interface for server: %v", err))
Expand Down Expand Up @@ -265,3 +277,22 @@ func (p *Provider) patchNetworkInterface(ctx context.Context, projectID, serverI

return nil
}

func (p *Provider) WaitUntilServerRunning(ctx context.Context, projectID, region, serverID string) error {
return wait.PollUntilContextTimeout(ctx, p.pollingInterval, p.pollingTimeout, true, func(ctx context.Context) (bool, error) {
server, err := p.client.GetServer(ctx, projectID, region, serverID)
if err != nil {
return false, err
}

switch server.Status {
case "ACTIVE":
klog.V(2).Infof("Server %q reached ACTIVE state", serverID)
return true, nil
case "ERROR":
return false, fmt.Errorf("server in ERROR state: %q", server.ErrorMessage)
}

return false, nil
})
}
66 changes: 65 additions & 1 deletion pkg/provider/create_basic_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package provider
import (
"context"
"fmt"
"time"

"github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1"
"github.com/gardener/machine-controller-manager/pkg/util/provider/driver"
Expand Down Expand Up @@ -33,7 +34,9 @@ var _ = Describe("CreateMachine", func() {
ctx = context.Background()
mockClient = &mock.StackitClient{}
provider = &Provider{
client: mockClient,
client: mockClient,
pollingInterval: 10 * time.Millisecond,
pollingTimeout: 5 * time.Second,
}

secret = &corev1.Secret{
Expand Down Expand Up @@ -114,6 +117,41 @@ var _ = Describe("CreateMachine", func() {
Expect(capturedReq.MachineType).To(Equal("c2i.2"))
Expect(capturedReq.ImageID).To(Equal("12345678-1234-1234-1234-123456789abc"))
})

It("should poll GetServer until server is ACTIVE", func() {
getServerCallCount := 0

mockClient.CreateServerFunc = func(_ context.Context, _, _ string, req *client.CreateServerRequest) (*client.Server, error) {
return &client.Server{
ID: "550e8400-e29b-41d4-a716-446655440000",
Name: req.Name,
Status: "CREATING",
}, nil
}
mockClient.GetServerFunc = func(_ context.Context, _, _, _ string) (*client.Server, error) {
getServerCallCount++
// First call returns CREATING, second call returns ACTIVE
if getServerCallCount == 1 {
return &client.Server{
ID: "550e8400-e29b-41d4-a716-446655440000",
Name: "test-machine",
Status: "CREATING",
}, nil
}
return &client.Server{
ID: "550e8400-e29b-41d4-a716-446655440000",
Name: "test-machine",
Status: "ACTIVE",
}, nil
}

resp, err := provider.CreateMachine(ctx, req)

Expect(err).NotTo(HaveOccurred())
Expect(resp).NotTo(BeNil())
Expect(resp.ProviderID).To(Equal("stackit://11111111-2222-3333-4444-555555555555/550e8400-e29b-41d4-a716-446655440000"))
Expect(getServerCallCount).To(BeNumerically(">=", 2))
})
})

Context("with invalid ProviderSpec", func() {
Expand Down Expand Up @@ -185,5 +223,31 @@ var _ = Describe("CreateMachine", func() {
Expect(ok).To(BeTrue())
Expect(statusErr.Code()).To(Equal(codes.Unavailable))
})

It("should return ResourceExhausted when server enters ERROR state with 'no valid host'", func() {
mockClient.CreateServerFunc = func(_ context.Context, _, _ string, req *client.CreateServerRequest) (*client.Server, error) {
return &client.Server{
ID: "550e8400-e29b-41d4-a716-446655440000",
Name: req.Name,
Status: "CREATING",
}, nil
}
mockClient.GetServerFunc = func(_ context.Context, _, _, _ string) (*client.Server, error) {
return &client.Server{
ID: "550e8400-e29b-41d4-a716-446655440000",
Name: "test-machine",
Status: "ERROR",
ErrorMessage: "No valid host was found. There are not enough hosts available.",
}, nil
}

_, err := provider.CreateMachine(ctx, req)

Expect(err).To(HaveOccurred())
statusErr, ok := status.FromError(err)
Expect(ok).To(BeTrue())
Expect(statusErr.Code()).To(Equal(codes.ResourceExhausted))
Expect(err.Error()).To(ContainSubstring("No valid host"))
})
})
})
23 changes: 21 additions & 2 deletions pkg/provider/delete.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"github.com/gardener/machine-controller-manager/pkg/util/provider/machinecodes/codes"
"github.com/gardener/machine-controller-manager/pkg/util/provider/machinecodes/status"
"github.com/stackitcloud/machine-controller-manager-provider-stackit/pkg/client"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/klog/v2"
)

Expand All @@ -31,7 +32,7 @@ func (p *Provider) DeleteMachine(ctx context.Context, req *driver.DeleteMachineR

// Initialize client on first use (lazy initialization)
if err := p.ensureClient(serviceAccountKey); err != nil {
return nil, status.Error(codes.Internal, fmt.Sprintf("failed to initialize STACKIT client: %v", err))
return nil, status.Error(codes.Unauthenticated, fmt.Sprintf("failed to initialize STACKIT client: %v", err))
}

var projectID, serverID string
Expand Down Expand Up @@ -87,7 +88,25 @@ func (p *Provider) DeleteMachine(ctx context.Context, req *driver.DeleteMachineR
return nil, status.Error(codes.Internal, fmt.Sprintf("failed to delete server: %v", err))
}

klog.V(2).Infof("Successfully deleted server %q for machine %q", serverID, req.Machine.Name)
if err := p.WaitUntilServerDeleted(ctx, projectID, providerSpec.Region, serverID); err != nil {
klog.Errorf("Failed waiting for server %q to be deleted for machine %q: %v", serverID, req.Machine.Name, err)
return nil, status.Error(codes.DeadlineExceeded, fmt.Sprintf("failed waiting for server to be deleted: %v", err))
}

return &driver.DeleteMachineResponse{}, nil
}

func (p *Provider) WaitUntilServerDeleted(ctx context.Context, projectID, region, serverID string) error {
return wait.PollUntilContextTimeout(ctx, p.pollingInterval, p.pollingTimeout, true, func(ctx context.Context) (bool, error) {
_, err := p.client.GetServer(ctx, projectID, region, serverID)
if err != nil {
// Server is deleted if we get a not found error
if errors.Is(err, client.ErrServerNotFound) {
klog.V(2).Infof("Server %q has been deleted", serverID)
return true, nil
}
}

return false, err
})
}
47 changes: 46 additions & 1 deletion pkg/provider/delete_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package provider
import (
"context"
"fmt"
"time"

"github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1"
"github.com/gardener/machine-controller-manager/pkg/util/provider/driver"
Expand Down Expand Up @@ -33,7 +34,9 @@ var _ = Describe("DeleteMachine", func() {
ctx = context.Background()
mockClient = &mock.StackitClient{}
provider = &Provider{
client: mockClient,
client: mockClient,
pollingInterval: 10 * time.Millisecond,
pollingTimeout: 5 * time.Second,
}

// Create secret with projectId
Expand Down Expand Up @@ -87,6 +90,9 @@ var _ = Describe("DeleteMachine", func() {
mockClient.DeleteServerFunc = func(_ context.Context, _, _, _ string) error {
return nil
}
mockClient.GetServerFunc = func(_ context.Context, _, _, _ string) (*client.Server, error) {
return nil, fmt.Errorf("%w: status 404", client.ErrServerNotFound)
}

resp, err := provider.DeleteMachine(ctx, req)

Expand All @@ -103,19 +109,58 @@ var _ = Describe("DeleteMachine", func() {
capturedServerID = serverID
return nil
}
mockClient.GetServerFunc = func(_ context.Context, _, _, _ string) (*client.Server, error) {
return nil, fmt.Errorf("%w: status 404", client.ErrServerNotFound)
}

_, err := provider.DeleteMachine(ctx, req)

Expect(err).NotTo(HaveOccurred())
Expect(capturedProjectID).To(Equal("11111111-2222-3333-4444-555555555555"))
Expect(capturedServerID).To(Equal("550e8400-e29b-41d4-a716-446655440000"))
})

It("should poll GetServer until server is deleted", func() {
getServerCallCount := 0

mockClient.DeleteServerFunc = func(_ context.Context, _, _, _ string) error {
return nil
}
mockClient.GetServerFunc = func(_ context.Context, _, _, _ string) (*client.Server, error) {
getServerCallCount++
// First call returns server still exists, second call returns not found
if getServerCallCount == 1 {
return &client.Server{
ID: "550e8400-e29b-41d4-a716-446655440000",
Name: "test-machine",
Status: "SHUTTING_DOWN",
}, nil
}
return nil, fmt.Errorf("%w: status 404", client.ErrServerNotFound)
}

resp, err := provider.DeleteMachine(ctx, req)

Expect(err).NotTo(HaveOccurred())
Expect(resp).NotTo(BeNil())
Expect(getServerCallCount).To(BeNumerically(">=", 2))
})
})

Context("with missing or invalid ProviderID", func() {
It("should still delete the machine when ProviderID is missing", func() {
machine.Spec.ProviderID = ""

mockClient.GetServerFunc = func(_ context.Context, _, _, _ string) (*client.Server, error) {
return &client.Server{
ID: "550e8400-e29b-41d4-a716-446655440000",
Name: "test-machine",
}, nil
}
mockClient.DeleteServerFunc = func(_ context.Context, _, _, _ string) error {
return nil
}

_, err := provider.DeleteMachine(ctx, req)

Expect(err).ToNot(HaveOccurred())
Expand Down
5 changes: 5 additions & 0 deletions pkg/provider/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,8 @@ func extractSecretCredentials(secretData map[string][]byte) (projectID, serviceA
serviceAccountKey = string(secretData[validation.StackitServiceAccountKey])
return projectID, serviceAccountKey
}

func isResourceExhaustedError(err error) bool {
errMsg := strings.ToLower(err.Error())
return strings.Contains(errMsg, "no valid host") || strings.Contains(errMsg, "quota exceeded")
}
8 changes: 7 additions & 1 deletion pkg/provider/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package provider
import (
"fmt"
"sync"
"time"

"github.com/gardener/machine-controller-manager/pkg/util/provider/driver"
client2 "github.com/stackitcloud/machine-controller-manager-provider-stackit/pkg/client"
Expand All @@ -24,12 +25,17 @@ type Provider struct {
clientOnce sync.Once // Ensures client is initialized exactly once
clientErr error // Stores initialization error if any
capturedCredentials string // Service account key used for initialization (for defensive checks)
// intervals need to be configurable to speed up tests
pollingInterval time.Duration // Interval between polling attempts
pollingTimeout time.Duration // Maximum time to wait during polling
}

// NewProvider returns an empty provider object
func NewProvider(i spi.SessionProviderInterface) driver.Driver {
return &Provider{
SPI: i,
SPI: i,
pollingInterval: 5 * time.Second,
pollingTimeout: 10 * time.Minute,
}
}

Expand Down