From 50c31024e2dbf11cacf97e05c816ac0c86897534 Mon Sep 17 00:00:00 2001 From: Carlos Eduardo Arango Gutierrez Date: Fri, 13 Feb 2026 11:29:40 +0100 Subject: [PATCH] fix(aws): propagate image architecture in cluster mode Pass Image.Architecture to resolveImageForNode instead of empty string. Previously, cluster mode always defaulted to x86_64 regardless of the user's image architecture specification. Also add documentation for arm64 instance type alternatives in the API type comments. Signed-off-by: Carlos Eduardo Arango Gutierrez --- api/holodeck/v1alpha1/types.go | 8 ++++---- pkg/provider/aws/cluster.go | 7 ++++++- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/api/holodeck/v1alpha1/types.go b/api/holodeck/v1alpha1/types.go index 7e9168585..0a3d5080e 100644 --- a/api/holodeck/v1alpha1/types.go +++ b/api/holodeck/v1alpha1/types.go @@ -185,10 +185,10 @@ type ControlPlaneSpec struct { Count int32 `json:"count"` // InstanceType specifies the EC2 instance type for control-plane nodes. + // Default is "m5.xlarge" (x86_64). For arm64, use Graviton types + // (e.g., "m7g.xlarge", "c7g.xlarge"). // +kubebuilder:default="m5.xlarge" // +optional - // +optional - InstanceType string `json:"instanceType,omitempty"` // OS specifies the operating system by ID (e.g., "ubuntu-22.04"). @@ -236,10 +236,10 @@ type WorkerPoolSpec struct { // InstanceType specifies the EC2 instance type for worker nodes. // For GPU workloads, use GPU instance types (g4dn, p4d, etc.). + // Default is "g4dn.xlarge" (x86_64). For arm64 GPU workloads, + // use "g5g.xlarge" or similar Graviton GPU instances. // +kubebuilder:default="g4dn.xlarge" // +optional - // +optional - InstanceType string `json:"instanceType,omitempty"` // OS specifies the operating system by ID (e.g., "ubuntu-22.04"). diff --git a/pkg/provider/aws/cluster.go b/pkg/provider/aws/cluster.go index a4792a6ac..504f487fe 100644 --- a/pkg/provider/aws/cluster.go +++ b/pkg/provider/aws/cluster.go @@ -396,7 +396,12 @@ func (p *Provider) createInstances( image *v1alpha1.Image, ) ([]InstanceInfo, error) { // Resolve AMI for this node pool - resolved, err := p.resolveImageForNode(os, image, "") + // Determine architecture from image spec + var arch string + if image != nil && image.Architecture != "" { + arch = image.Architecture + } + resolved, err := p.resolveImageForNode(os, image, arch) if err != nil { return nil, fmt.Errorf("error resolving AMI: %w", err) }