Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 8 additions & 10 deletions internal/k8s/cluster/eks.go
Original file line number Diff line number Diff line change
Expand Up @@ -693,13 +693,12 @@ func (p *EKSProvider) createNodeGroupWithAWSCLI(ctx context.Context, clusterName
}

func (p *EKSProvider) waitForNodeGroupActive(ctx context.Context, clusterName, nodeGroupName string) error {
timeout := 15 * time.Minute
deadline := time.Now().Add(timeout)
deadline := time.Now().Add(DefaultNodeGroupCreateTimeout)

for time.Now().Before(deadline) {
ng, err := p.describeNodeGroup(ctx, clusterName, nodeGroupName)
if err != nil {
time.Sleep(30 * time.Second)
time.Sleep(DefaultPollInterval)
continue
}

Expand All @@ -714,7 +713,7 @@ func (p *EKSProvider) waitForNodeGroupActive(ctx context.Context, clusterName, n
return fmt.Errorf("node group creation failed")
}

time.Sleep(30 * time.Second)
time.Sleep(DefaultPollInterval)
}

return fmt.Errorf("timeout waiting for node group to become active")
Expand Down Expand Up @@ -911,14 +910,14 @@ func (p *EKSProvider) deleteNodeGroup(ctx context.Context, clusterName, nodeGrou

func (p *EKSProvider) waitForClusterActive(ctx context.Context, clusterName, profile, region string, timeout time.Duration) error {
if timeout <= 0 {
timeout = 20 * time.Minute
timeout = DefaultClusterCreateTimeout
}

deadline := time.Now().Add(timeout)
for time.Now().Before(deadline) {
cluster, err := p.describeCluster(ctx, clusterName)
if err != nil {
time.Sleep(30 * time.Second)
time.Sleep(DefaultPollInterval)
continue
}

Expand All @@ -933,15 +932,14 @@ func (p *EKSProvider) waitForClusterActive(ctx context.Context, clusterName, pro
return fmt.Errorf("cluster creation failed")
}

time.Sleep(30 * time.Second)
time.Sleep(DefaultPollInterval)
}

return fmt.Errorf("timeout waiting for cluster to become active")
}

func (p *EKSProvider) waitForNodeGroupDeleted(ctx context.Context, clusterName, nodeGroupName string) error {
timeout := 10 * time.Minute
deadline := time.Now().Add(timeout)
deadline := time.Now().Add(DefaultNodeGroupDeleteTimeout)

for time.Now().Before(deadline) {
_, err := p.describeNodeGroup(ctx, clusterName, nodeGroupName)
Expand All @@ -954,7 +952,7 @@ func (p *EKSProvider) waitForNodeGroupDeleted(ctx context.Context, clusterName,
fmt.Printf("[aws] waiting for node group %s deletion\n", nodeGroupName)
}

time.Sleep(30 * time.Second)
time.Sleep(DefaultPollInterval)
}

return fmt.Errorf("timeout waiting for node group deletion")
Expand Down
13 changes: 6 additions & 7 deletions internal/k8s/cluster/gke.go
Original file line number Diff line number Diff line change
Expand Up @@ -579,14 +579,14 @@ func (p *GKEProvider) listNodePools(ctx context.Context, clusterName string) ([]

func (p *GKEProvider) waitForClusterRunning(ctx context.Context, clusterName, project, region string, timeout time.Duration) error {
if timeout <= 0 {
timeout = 15 * time.Minute
timeout = DefaultClusterCreateTimeout
}

deadline := time.Now().Add(timeout)
for time.Now().Before(deadline) {
cluster, err := p.describeCluster(ctx, clusterName)
if err != nil {
time.Sleep(30 * time.Second)
time.Sleep(DefaultPollInterval)
continue
}

Expand All @@ -601,20 +601,19 @@ func (p *GKEProvider) waitForClusterRunning(ctx context.Context, clusterName, pr
return fmt.Errorf("cluster creation failed with status: %s", cluster.Status)
}

time.Sleep(30 * time.Second)
time.Sleep(DefaultPollInterval)
}

return fmt.Errorf("timeout waiting for cluster to become running")
}

func (p *GKEProvider) waitForNodePoolRunning(ctx context.Context, clusterName, nodePoolName string) error {
timeout := 10 * time.Minute
deadline := time.Now().Add(timeout)
deadline := time.Now().Add(DefaultNodeGroupCreateTimeout)

for time.Now().Before(deadline) {
nodePools, err := p.listNodePools(ctx, clusterName)
if err != nil {
time.Sleep(30 * time.Second)
time.Sleep(DefaultPollInterval)
continue
}

Expand All @@ -634,7 +633,7 @@ func (p *GKEProvider) waitForNodePoolRunning(ctx context.Context, clusterName, n
}
}

time.Sleep(30 * time.Second)
time.Sleep(DefaultPollInterval)
}

return fmt.Errorf("timeout waiting for node pool to become running")
Expand Down
10 changes: 5 additions & 5 deletions internal/k8s/cluster/kubeadm.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ func (p *KubeadmProvider) Create(ctx context.Context, opts CreateOptions) (*Clus
}

// Wait for SSH to be available
if err := WaitForSSH(ctx, cpInstance.PublicIP, 22, 5*time.Minute); err != nil {
if err := WaitForSSH(ctx, cpInstance.PublicIP, 22, DefaultSSHConnectTimeout); err != nil {
_ = p.terminateInstance(ctx, cpInstance.InstanceID)
_ = p.deleteSecurityGroup(ctx, sgID)
return nil, fmt.Errorf("control plane SSH not available: %w", err)
Expand Down Expand Up @@ -209,7 +209,7 @@ func (p *KubeadmProvider) Create(ctx context.Context, opts CreateOptions) (*Clus
}

// Wait for SSH
if err := WaitForSSH(ctx, workerInstance.PublicIP, 22, 5*time.Minute); err != nil {
if err := WaitForSSH(ctx, workerInstance.PublicIP, 22, DefaultSSHConnectTimeout); err != nil {
// Continue anyway, will fail on bootstrap
if p.debug {
fmt.Printf("[kubeadm] warning: worker %d SSH not available: %v\n", i, err)
Expand Down Expand Up @@ -279,7 +279,7 @@ func (p *KubeadmProvider) Create(ctx context.Context, opts CreateOptions) (*Clus
fmt.Println("[kubeadm] waiting for nodes to be ready...")
}

if err := WaitForNodeReady(ctx, ssh, 5*time.Minute); err != nil {
if err := WaitForNodeReady(ctx, ssh, DefaultSSHConnectTimeout); err != nil {
if p.debug {
fmt.Printf("[kubeadm] warning: not all nodes ready: %v\n", err)
}
Expand Down Expand Up @@ -342,7 +342,7 @@ func (p *KubeadmProvider) Delete(ctx context.Context, clusterName string) error
}

// Wait for instances to terminate
time.Sleep(30 * time.Second)
time.Sleep(DefaultPollInterval)

// Delete security group
sgID, err := p.findSecurityGroup(ctx, clusterName)
Expand Down Expand Up @@ -944,7 +944,7 @@ func (p *KubeadmProvider) scaleUp(ctx context.Context, cluster *ClusterInfo, cou
}

// Wait for SSH
if err := WaitForSSH(ctx, instance.PublicIP, 22, 5*time.Minute); err != nil {
if err := WaitForSSH(ctx, instance.PublicIP, 22, DefaultSSHConnectTimeout); err != nil {
continue
}

Expand Down
24 changes: 24 additions & 0 deletions internal/k8s/cluster/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,30 @@ const (
ClusterTypeExisting ClusterType = "existing"
)

// Standard timeout constants for cluster operations
const (
// DefaultClusterCreateTimeout is the default timeout for cluster creation
DefaultClusterCreateTimeout = 20 * time.Minute

// DefaultNodeGroupCreateTimeout is the default timeout for node group creation
DefaultNodeGroupCreateTimeout = 15 * time.Minute

// DefaultNodeGroupDeleteTimeout is the default timeout for node group deletion
DefaultNodeGroupDeleteTimeout = 10 * time.Minute

// DefaultSSHConnectTimeout is the default timeout for SSH connection
DefaultSSHConnectTimeout = 5 * time.Minute

// DefaultNodeReadyTimeout is the default timeout for node to become ready
DefaultNodeReadyTimeout = 5 * time.Minute

// DefaultPollInterval is the standard polling interval for status checks
DefaultPollInterval = 30 * time.Second

// DefaultCommandTimeout is the default timeout for CLI command execution
DefaultCommandTimeout = 2 * time.Minute
)

// NodeInfo contains information about a cluster node
type NodeInfo struct {
Name string `json:"name"`
Expand Down
Loading