Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions pkg/provider/aws/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,16 @@ const (
SecurityGroupID string = "security-group-id"
InstanceID string = "instance-id"
PublicDnsName string = "public-dns-name"

// Cluster networking cache keys
PublicSubnetID string = "public-subnet-id"
NatGatewayID string = "nat-gateway-id"
PrivateRouteTable string = "private-route-table-id"
PublicRouteTable string = "public-route-table-id"
CPSecurityGroupID string = "cp-security-group-id"
WorkerSecurityGroupID string = "worker-security-group-id"
EIPAllocationID string = "eip-allocation-id"
IAMInstanceProfileArn string = "iam-instance-profile-arn"
Comment on lines +49 to +57
Copy link

Copilot AI Mar 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PR description states 8 new cluster networking cache keys are persisted/restored, but in this change set private-route-table-id is not written in dumpCache()/updateStatus(), and there is no dedicated struct field for it (it is instead mapped onto RouteTable during unmarshal). Either update the implementation to truly support all 8 keys, or adjust the PR description to match what is actually persisted.

Copilot uses AI. Check for mistakes.
)

var (
Expand All @@ -71,6 +81,15 @@ type AWS struct {
SecurityGroupid string
Instanceid string
PublicDnsName string

// Cluster networking fields
PublicSubnetid string
NatGatewayid string
PublicRouteTable string
CPSecurityGroupid string
WorkerSecurityGroupid string
EIPAllocationid string
IAMInstanceProfileArn string
}

type Provider struct {
Expand Down Expand Up @@ -232,6 +251,22 @@ func (p *Provider) unmarsalCache() (*AWS, error) {
aws.Instanceid = p.Value
case PublicDnsName:
aws.PublicDnsName = p.Value
case PublicSubnetID:
aws.PublicSubnetid = p.Value
case NatGatewayID:
aws.NatGatewayid = p.Value
case PrivateRouteTable:
aws.RouteTable = p.Value
Comment on lines +258 to +259
Copy link

Copilot AI Mar 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PrivateRouteTable is introduced as a distinct cache key, but it is not persisted in dumpCache() and there is no dedicated struct field for it. Additionally, unmarsalCache() maps PrivateRouteTable into aws.RouteTable, which can silently overwrite the existing route-table-id value if both keys are present. Either remove PrivateRouteTable entirely (if RouteTable is meant to be the private route table), or add a separate PrivateRouteTable field and persist/restore it consistently (and update status/cache tests accordingly).

Suggested change
case PrivateRouteTable:
aws.RouteTable = p.Value

Copilot uses AI. Check for mistakes.
case PublicRouteTable:
aws.PublicRouteTable = p.Value
case CPSecurityGroupID:
aws.CPSecurityGroupid = p.Value
case WorkerSecurityGroupID:
aws.WorkerSecurityGroupid = p.Value
case EIPAllocationID:
aws.EIPAllocationid = p.Value
case IAMInstanceProfileArn:
aws.IAMInstanceProfileArn = p.Value
default:
// Ignore non AWS infra properties
continue
Expand Down
183 changes: 183 additions & 0 deletions pkg/provider/aws/cache_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
/*
* Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package aws

import (
"os"
"path/filepath"
"testing"

"github.com/NVIDIA/holodeck/api/holodeck/v1alpha1"
"github.com/NVIDIA/holodeck/internal/logger"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

// TestCacheRoundTrip verifies that all AWS struct fields survive an
// updateStatus -> unmarsalCache round-trip. If a field is not persisted,
// delete will fail to clean up the corresponding resource.
func TestCacheRoundTrip(t *testing.T) {
tmpDir, err := os.MkdirTemp("", "cache-roundtrip-*")
if err != nil {
t.Fatalf("failed to create temp dir: %v", err)
}
defer func() { _ = os.RemoveAll(tmpDir) }()

cacheFile := filepath.Join(tmpDir, "cache.yaml")

env := v1alpha1.Environment{
ObjectMeta: metav1.ObjectMeta{Name: "test-env"},
Spec: v1alpha1.EnvironmentSpec{
Provider: v1alpha1.ProviderAWS,
Instance: v1alpha1.Instance{
Region: "us-west-2",
},
},
}

log := logger.NewLogger()
provider := &Provider{
cacheFile: cacheFile,
Environment: &env,
log: log,
}

original := &AWS{
Vpcid: "vpc-abc123",
Subnetid: "subnet-def456",
InternetGwid: "igw-ghi789",
InternetGatewayAttachment: "vpc-abc123",
RouteTable: "rtb-jkl012",
SecurityGroupid: "sg-mno345",
Instanceid: "i-pqr678",
PublicDnsName: "ec2-1-2-3-4.compute.amazonaws.com",
// New cluster networking fields
PublicSubnetid: "subnet-pub789",
NatGatewayid: "nat-abc123",
PublicRouteTable: "rtb-pub456",
CPSecurityGroupid: "sg-cp789",
WorkerSecurityGroupid: "sg-worker012",
EIPAllocationid: "eipalloc-345",
IAMInstanceProfileArn: "arn:aws:iam::123456789012:instance-profile/holodeck",
}

// Write cache via updateStatus
if err := provider.updateStatus(env, original, nil); err != nil {
t.Fatalf("updateStatus failed: %v", err)
}

// Read it back
restored, err := provider.unmarsalCache()
if err != nil {
t.Fatalf("unmarsalCache failed: %v", err)
}

// Verify every field survives the round-trip
assertions := []struct {
name string
got string
expected string
}{
{"Vpcid", restored.Vpcid, original.Vpcid},
{"Subnetid", restored.Subnetid, original.Subnetid},
{"InternetGwid", restored.InternetGwid, original.InternetGwid},
{"InternetGatewayAttachment", restored.InternetGatewayAttachment, original.InternetGatewayAttachment},
{"RouteTable", restored.RouteTable, original.RouteTable},
{"SecurityGroupid", restored.SecurityGroupid, original.SecurityGroupid},
{"Instanceid", restored.Instanceid, original.Instanceid},
{"PublicDnsName", restored.PublicDnsName, original.PublicDnsName},
{"PublicSubnetid", restored.PublicSubnetid, original.PublicSubnetid},
{"NatGatewayid", restored.NatGatewayid, original.NatGatewayid},
{"PublicRouteTable", restored.PublicRouteTable, original.PublicRouteTable},
{"CPSecurityGroupid", restored.CPSecurityGroupid, original.CPSecurityGroupid},
{"WorkerSecurityGroupid", restored.WorkerSecurityGroupid, original.WorkerSecurityGroupid},
{"EIPAllocationid", restored.EIPAllocationid, original.EIPAllocationid},
{"IAMInstanceProfileArn", restored.IAMInstanceProfileArn, original.IAMInstanceProfileArn},
}
Comment on lines +89 to +110
Copy link

Copilot AI Mar 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The round-trip test asserts only the struct fields, but it does not cover the newly added PrivateRouteTable cache key at all. Given PrivateRouteTable exists in aws.go, this test suite will not catch regressions where that key is dropped or incorrectly mapped; consider adding coverage for it (or removing the constant if it isn't intended to be supported).

Copilot uses AI. Check for mistakes.

for _, a := range assertions {
if a.got != a.expected {
t.Errorf("field %s: got %q, want %q", a.name, a.got, a.expected)
}
}
}

// TestCacheRoundTripSingleNode verifies backward compatibility: a cache
// with only the original single-node fields round-trips correctly.
func TestCacheRoundTripSingleNode(t *testing.T) {
tmpDir, err := os.MkdirTemp("", "cache-single-*")
if err != nil {
t.Fatalf("failed to create temp dir: %v", err)
}
defer func() { _ = os.RemoveAll(tmpDir) }()

cacheFile := filepath.Join(tmpDir, "cache.yaml")

env := v1alpha1.Environment{
ObjectMeta: metav1.ObjectMeta{Name: "test-single"},
Spec: v1alpha1.EnvironmentSpec{
Provider: v1alpha1.ProviderAWS,
Instance: v1alpha1.Instance{
Region: "us-east-1",
},
},
}

log := logger.NewLogger()
provider := &Provider{
cacheFile: cacheFile,
Environment: &env,
log: log,
}

// Only set the original single-node fields
original := &AWS{
Vpcid: "vpc-single",
Subnetid: "subnet-single",
InternetGwid: "igw-single",
InternetGatewayAttachment: "vpc-single",
RouteTable: "rtb-single",
SecurityGroupid: "sg-single",
Instanceid: "i-single",
PublicDnsName: "ec2-single.compute.amazonaws.com",
}

if err := provider.updateStatus(env, original, nil); err != nil {
t.Fatalf("updateStatus failed: %v", err)
}

restored, err := provider.unmarsalCache()
if err != nil {
t.Fatalf("unmarsalCache failed: %v", err)
}

// New fields should be empty strings
if restored.PublicSubnetid != "" {
t.Errorf("PublicSubnetid should be empty, got %q", restored.PublicSubnetid)
}
if restored.NatGatewayid != "" {
t.Errorf("NatGatewayid should be empty, got %q", restored.NatGatewayid)
}

// Original fields must survive
if restored.Vpcid != original.Vpcid {
t.Errorf("Vpcid: got %q, want %q", restored.Vpcid, original.Vpcid)
}
if restored.SecurityGroupid != original.SecurityGroupid {
t.Errorf("SecurityGroupid: got %q, want %q", restored.SecurityGroupid, original.SecurityGroupid)
}
}
42 changes: 42 additions & 0 deletions pkg/provider/aws/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,13 @@ func (p *Provider) updateStatus(env v1alpha1.Environment, cache *AWS, condition
{Name: SecurityGroupID, Value: cache.SecurityGroupid},
{Name: InstanceID, Value: cache.Instanceid},
{Name: PublicDnsName, Value: cache.PublicDnsName},
{Name: PublicSubnetID, Value: cache.PublicSubnetid},
{Name: NatGatewayID, Value: cache.NatGatewayid},
{Name: PublicRouteTable, Value: cache.PublicRouteTable},
{Name: CPSecurityGroupID, Value: cache.CPSecurityGroupid},
{Name: WorkerSecurityGroupID, Value: cache.WorkerSecurityGroupid},
{Name: EIPAllocationID, Value: cache.EIPAllocationid},
{Name: IAMInstanceProfileArn, Value: cache.IAMInstanceProfileArn},
}
modified = true
} else {
Expand Down Expand Up @@ -138,6 +145,41 @@ func (p *Provider) updateStatus(env v1alpha1.Environment, cache *AWS, condition
properties.Value = cache.PublicDnsName
modified = true
}
case PublicSubnetID:
if properties.Value != cache.PublicSubnetid {
properties.Value = cache.PublicSubnetid
modified = true
}
Comment on lines +148 to +152
Copy link

Copilot AI Mar 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The for _, p := range envCopy.Status.Properties loop ranges by value, so assignments like p.Value = ... only update a copy and do not mutate envCopy.Status.Properties. This means new fields (e.g., PublicSubnetID, NatGatewayID, etc.) will not actually be persisted when properties already exist, even though modified becomes true. Iterate by index (e.g., for i := range ...) or take pointers so the slice elements are updated before calling update().

Copilot uses AI. Check for mistakes.
case NatGatewayID:
if properties.Value != cache.NatGatewayid {
properties.Value = cache.NatGatewayid
modified = true
}
case PublicRouteTable:
if properties.Value != cache.PublicRouteTable {
properties.Value = cache.PublicRouteTable
modified = true
}
case CPSecurityGroupID:
if properties.Value != cache.CPSecurityGroupid {
properties.Value = cache.CPSecurityGroupid
modified = true
}
case WorkerSecurityGroupID:
if properties.Value != cache.WorkerSecurityGroupid {
properties.Value = cache.WorkerSecurityGroupid
modified = true
}
case EIPAllocationID:
if properties.Value != cache.EIPAllocationid {
properties.Value = cache.EIPAllocationid
modified = true
}
case IAMInstanceProfileArn:
if properties.Value != cache.IAMInstanceProfileArn {
properties.Value = cache.IAMInstanceProfileArn
modified = true
}
default:
// Ignore non AWS infra properties
continue
Expand Down
Loading