diff --git a/pkg/cleanup/cleanup.go b/pkg/cleanup/cleanup.go index 60b99015..d221e7ab 100644 --- a/pkg/cleanup/cleanup.go +++ b/pkg/cleanup/cleanup.go @@ -29,6 +29,7 @@ import ( "github.com/aws/aws-sdk-go-v2/config" "github.com/aws/aws-sdk-go-v2/service/ec2" "github.com/aws/aws-sdk-go-v2/service/ec2/types" + "github.com/aws/aws-sdk-go-v2/service/elasticloadbalancingv2" internalaws "github.com/NVIDIA/holodeck/internal/aws" "github.com/NVIDIA/holodeck/internal/logger" @@ -60,8 +61,9 @@ func safeString(s *string) string { // Cleaner handles cleanup of AWS resources type Cleaner struct { - ec2 internalaws.EC2Client - log *logger.FunLogger + ec2 internalaws.EC2Client + elbv2 internalaws.ELBv2Client + log *logger.FunLogger } // CleanerOption is a functional option for configuring the Cleaner. @@ -75,6 +77,14 @@ func WithEC2Client(client internalaws.EC2Client) CleanerOption { } } +// WithELBv2Client sets a custom ELBv2 client for the Cleaner. +// This is primarily used for testing to inject mock clients. +func WithELBv2Client(client internalaws.ELBv2Client) CleanerOption { + return func(c *Cleaner) { + c.elbv2 = client + } +} + // New creates a new AWS resource cleaner. // Optional functional options can be provided to customize the cleaner, // such as injecting a mock EC2 client for testing. @@ -89,8 +99,8 @@ func New(log *logger.FunLogger, region string, opt(c) } - // If no EC2 client was injected, create the real one - if c.ec2 == nil { + // If no clients were injected, create real ones from shared config + if c.ec2 == nil || c.elbv2 == nil { // Use Background here because New is a top-level initializer // without caller-provided context. cfg, err := config.LoadDefaultConfig(context.Background(), @@ -98,7 +108,12 @@ func New(log *logger.FunLogger, region string, if err != nil { return nil, fmt.Errorf("failed to load AWS config: %w", err) } - c.ec2 = ec2.NewFromConfig(cfg) + if c.ec2 == nil { + c.ec2 = ec2.NewFromConfig(cfg) + } + if c.elbv2 == nil { + c.elbv2 = elasticloadbalancingv2.NewFromConfig(cfg) + } } return c, nil @@ -240,6 +255,15 @@ func (c *Cleaner) DeleteVPCResources(ctx context.Context, vpcID string) error { return fmt.Errorf("cleanup cancelled: %w", err) } + // Delete load balancers (NLBs/ALBs) — must happen before subnets/IGW/VPC + if err := c.deleteLoadBalancers(ctx, vpcID); err != nil { + c.log.Warning("Failed to delete load balancers: %v", err) + } + + if err := ctx.Err(); err != nil { + return fmt.Errorf("cleanup cancelled after load balancer deletion: %w", err) + } + // Delete instances if err := c.deleteInstances(ctx, vpcID); err != nil { return fmt.Errorf("failed to delete instances: %w", err) @@ -619,3 +643,94 @@ func (c *Cleaner) deleteVPC(ctx context.Context, vpcID string) error { return fmt.Errorf("failed to delete VPC %s after %d attempts", vpcID, maxAttempts) } + +func (c *Cleaner) deleteLoadBalancers(ctx context.Context, vpcID string) error { + // Describe all load balancers and filter by VPC + describeInput := &elasticloadbalancingv2.DescribeLoadBalancersInput{} + result, err := c.elbv2.DescribeLoadBalancers(ctx, describeInput) + if err != nil { + return fmt.Errorf("failed to describe load balancers: %w", err) + } + + var count int + for _, lb := range result.LoadBalancers { + if lb.VpcId == nil || *lb.VpcId != vpcID { + continue + } + + lbArn := aws.ToString(lb.LoadBalancerArn) + lbName := aws.ToString(lb.LoadBalancerName) + c.log.Info("Deleting load balancer %s (%s)", lbName, lbArn) + + // Delete listeners + if err := c.deleteLBListeners(ctx, lbArn); err != nil { + c.log.Warning("Failed to delete listeners for %s: %v", lbName, err) + } + + // Delete target groups + if err := c.deleteLBTargetGroups(ctx, lbArn); err != nil { + c.log.Warning("Failed to delete target groups for %s: %v", lbName, err) + } + + // Delete the load balancer + _, err := c.elbv2.DeleteLoadBalancer(ctx, &elasticloadbalancingv2.DeleteLoadBalancerInput{ + LoadBalancerArn: aws.String(lbArn), + }) + if err != nil { + c.log.Warning("Failed to delete load balancer %s: %v", lbName, err) + continue + } + count++ + } + + if count > 0 { + c.log.Info("Deleted %d load balancer(s), waiting for ENIs to detach", count) + // NLBs take time to fully decommission and release ENIs. + // Wait briefly to allow ENI cleanup before subnet deletion. + select { + case <-ctx.Done(): + return fmt.Errorf("cancelled waiting for LB decommission: %w", ctx.Err()) + case <-time.After(30 * time.Second): + } + } + + return nil +} + +func (c *Cleaner) deleteLBListeners(ctx context.Context, lbArn string) error { + result, err := c.elbv2.DescribeListeners(ctx, &elasticloadbalancingv2.DescribeListenersInput{ + LoadBalancerArn: aws.String(lbArn), + }) + if err != nil { + return fmt.Errorf("failed to describe listeners: %w", err) + } + + for _, listener := range result.Listeners { + _, err := c.elbv2.DeleteListener(ctx, &elasticloadbalancingv2.DeleteListenerInput{ + ListenerArn: listener.ListenerArn, + }) + if err != nil { + c.log.Warning("Failed to delete listener %s: %v", safeString(listener.ListenerArn), err) + } + } + return nil +} + +func (c *Cleaner) deleteLBTargetGroups(ctx context.Context, lbArn string) error { + result, err := c.elbv2.DescribeTargetGroups(ctx, &elasticloadbalancingv2.DescribeTargetGroupsInput{ + LoadBalancerArn: aws.String(lbArn), + }) + if err != nil { + return fmt.Errorf("failed to describe target groups: %w", err) + } + + for _, tg := range result.TargetGroups { + _, err := c.elbv2.DeleteTargetGroup(ctx, &elasticloadbalancingv2.DeleteTargetGroupInput{ + TargetGroupArn: tg.TargetGroupArn, + }) + if err != nil { + c.log.Warning("Failed to delete target group %s: %v", safeString(tg.TargetGroupArn), err) + } + } + return nil +} diff --git a/pkg/provider/aws/nlb.go b/pkg/provider/aws/nlb.go index 4d880e48..6bd4e85d 100644 --- a/pkg/provider/aws/nlb.go +++ b/pkg/provider/aws/nlb.go @@ -54,15 +54,16 @@ func (p *Provider) createNLB(cache *ClusterCache) error { } lbName := nlbBaseName + nlbSuffix - // Use the public subnet for the internet-facing NLB + // Use the public subnet for the internal NLB (same subnet as instances) subnetIDs := []string{cache.PublicSubnetid} - // Create load balancer + // Create load balancer — internal scheme avoids hairpin routing issues + // where nodes connecting to the NLB's public IP get i/o timeouts createLBInput := &elasticloadbalancingv2.CreateLoadBalancerInput{ Name: aws.String(lbName), Type: lbType, Subnets: subnetIDs, - Scheme: elbv2types.LoadBalancerSchemeEnumInternetFacing, + Scheme: elbv2types.LoadBalancerSchemeEnumInternal, IpAddressType: elbv2types.IpAddressTypeIpv4, Tags: p.convertTagsToELBv2Tags(), } diff --git a/pkg/provisioner/templates/kubeadm_cluster.go b/pkg/provisioner/templates/kubeadm_cluster.go index 582469af..ca12a0aa 100644 --- a/pkg/provisioner/templates/kubeadm_cluster.go +++ b/pkg/provisioner/templates/kubeadm_cluster.go @@ -153,23 +153,6 @@ fi # Initialize cluster if [[ ! -f /etc/kubernetes/admin.conf ]]; then - # Wait for control-plane endpoint to be resolvable (NLB DNS may take time) - if [[ "$CONTROL_PLANE_ENDPOINT" == *"elb.amazonaws.com"* ]] || \ - [[ "$CONTROL_PLANE_ENDPOINT" == *"amazonaws.com"* ]]; then - holodeck_log "INFO" "$COMPONENT" "Waiting for NLB DNS to resolve: ${CONTROL_PLANE_ENDPOINT}" - for i in {1..30}; do - if host "${CONTROL_PLANE_ENDPOINT}" &>/dev/null || \ - getent hosts "${CONTROL_PLANE_ENDPOINT}" &>/dev/null; then - holodeck_log "INFO" "$COMPONENT" "NLB DNS resolved successfully" - break - fi - if [[ $i -eq 30 ]]; then - holodeck_log "WARN" "$COMPONENT" "NLB DNS not yet resolved after 5 min, proceeding anyway" - fi - sleep 10 - done - fi - INIT_ARGS=( --kubernetes-version="${K8S_VERSION}" --pod-network-cidr=192.168.0.0/16 @@ -296,12 +279,10 @@ if [[ "$IS_HA" == "true" ]] && [[ "$INIT_ENDPOINT" != "$CONTROL_PLANE_ENDPOINT" sed "s|controlPlaneEndpoint: ${INIT_ESCAPED}:6443|controlPlaneEndpoint: ${CONTROL_PLANE_ENDPOINT}:6443|g" | \ sudo kubectl --kubeconfig=/etc/kubernetes/admin.conf apply -f - || \ holodeck_log "WARN" "$COMPONENT" "Could not update kubeadm-config, join may need manual endpoint" - # Update admin.conf kubeconfig to use the NLB - sudo sed -i "s|server: https://${INIT_ESCAPED}:6443|server: https://${CONTROL_PLANE_ENDPOINT}:6443|g" \ - /etc/kubernetes/admin.conf - # Re-copy the updated admin.conf to user kubeconfig - sudo cp -f /etc/kubernetes/admin.conf "$HOME/.kube/config" - sudo chown "$(id -u):$(id -g)" "$HOME/.kube/config" + # NOTE: Do NOT patch admin.conf to use the NLB endpoint. + # CP nodes must use their local API server (localhost:6443) to avoid + # AWS NLB hairpin routing — NLBs drop traffic when a registered target + # connects through the NLB and gets routed back to itself. fi # Label this node as control-plane (keep the taint for multinode) @@ -388,6 +369,11 @@ holodeck_progress "$COMPONENT" 4 4 "Configuring node" # Setup kubeconfig for control-plane nodes if [[ "$IS_CONTROL_PLANE" == "true" ]]; then + # Patch admin.conf to use the local API server instead of the NLB endpoint. + # AWS NLBs drop hairpin traffic (target connects through NLB back to itself), + # so CP nodes must talk to their own local kube-apiserver. + sudo sed -i 's|server: https://.*:6443|server: https://localhost:6443|g' \ + /etc/kubernetes/admin.conf mkdir -p "$HOME/.kube" sudo cp -f /etc/kubernetes/admin.conf "$HOME/.kube/config" sudo chown "$(id -u):$(id -g)" "$HOME/.kube/config"