diff --git a/internal/api/exchange_lookup.go b/internal/api/exchange_lookup.go new file mode 100644 index 00000000..818174ed --- /dev/null +++ b/internal/api/exchange_lookup.go @@ -0,0 +1,193 @@ +// Package api provides the HTTP API handlers for the CUDly dashboard. +package api + +import ( + "context" + "fmt" + "strings" + + "github.com/LeanerCloud/CUDly/internal/config" + "github.com/LeanerCloud/CUDly/pkg/exchange" +) + +// recsLister is the narrow slice of config.StoreInterface that the +// reshape lookup needs. Scoped here so the closure stays unit-testable +// against a tiny fake instead of the full StoreInterface mock. +type recsLister interface { + ListStoredRecommendations(ctx context.Context, filter config.RecommendationFilter) ([]config.RecommendationRecord, error) +} + +// purchaseRecLookupFromStore builds an exchange.PurchaseRecLookup that +// reads the cached AWS Cost Explorer purchase recommendations out of +// Postgres for a given (region, currency) pair. Each +// RecommendationRecord is mapped to an OfferingOption with: +// +// - InstanceType = rec.ResourceType (e.g. "m6i.large") +// - OfferingID = rec.ID (UUID; the UI uses this as a stable handle) +// - EffectiveMonthlyCost = (UpfrontCost / termMonths) + MonthlyCost +// - NormalizationFactor = exchange.NormalizationFactorForSize(size) +// - CurrencyCode = currencyCode (propagated; recs don't carry it) +// +// Where termMonths = rec.Term × 12 (rec.Term is in years, AWS standard +// for RIs / Savings Plans). Term ≤ 0 means we can't amortise upfront, +// so we fall back to MonthlyCost alone — the dollar-units check will +// then accept or reject based on monthly recurring vs. source. +// +// AccountID scoping (cross-account leak guard): when accountID is +// non-empty we restrict the query to that single CloudAccount UUID so +// reshape can't surface another tenant's recs. Empty accountID means +// "no filter" — used for ambient-credentials deployments where the +// caller couldn't (or chose not to) resolve the source account. +func purchaseRecLookupFromStore(store recsLister, accountID string) exchange.PurchaseRecLookup { + return func(ctx context.Context, region, currencyCode string) ([]exchange.OfferingOption, error) { + filter := config.RecommendationFilter{ + Provider: "aws", + Service: "ec2", + Region: region, + } + if accountID != "" { + filter.AccountIDs = []string{accountID} + } + recs, err := store.ListStoredRecommendations(ctx, filter) + if err != nil { + return nil, err + } + out := make([]exchange.OfferingOption, 0, len(recs)) + for _, rec := range recs { + out = append(out, recommendationToOffering(rec, currencyCode)) + } + return out, nil + } +} + +// recommendationToOffering maps a single cached Cost Explorer purchase +// recommendation to the OfferingOption shape the reshape layer +// consumes. Pulled out so the mapping can be unit-tested in isolation +// (no DB / no ctx required). +// +// TermSeconds is derived from rec.Term (years) using the canonical +// AWS RI duration constant 31_536_000s/year — the same value the AWS +// SDK reports on ec2.ReservedInstance.Duration so the term-match guard +// in pkg/exchange.fillAlternativesFromRecs can compare apples-to-apples +// against RIInfo.TermSeconds populated from ec2.ConvertibleRI.Duration. +func recommendationToOffering(rec config.RecommendationRecord, currencyCode string) exchange.OfferingOption { + // AWS Cost Explorer returns MonthlyCost and UpfrontCost as totals for + // the recommended Count of instances, not per-instance. The reshape + // layer compares OfferingOption.EffectiveMonthlyCost against + // RIInfo.MonthlyCost which is per-instance, so we normalize here. + // rec.Count == 0 is treated as "unknown count, fall back to total" + // rather than dividing by zero — practically rare since the scheduler + // only stores recs with Count >= 1. + count := float64(1) + if rec.Count > 0 { + count = float64(rec.Count) + } + monthly := rec.MonthlyCost / count + if rec.Term > 0 { + // rec.Term is in years; canonical AWS RI/SP amortisation uses + // 12 months per year regardless of leap years. + termMonths := float64(rec.Term * 12) + if termMonths > 0 { + monthly += rec.UpfrontCost / (count * termMonths) + } + } + _, size := splitInstanceType(rec.ResourceType) + var termSeconds int64 + if rec.Term > 0 { + termSeconds = int64(rec.Term) * secondsPerYear + } + return exchange.OfferingOption{ + InstanceType: rec.ResourceType, + OfferingID: rec.ID, + EffectiveMonthlyCost: monthly, + NormalizationFactor: exchange.NormalizationFactorForSize(size), + CurrencyCode: currencyCode, + TermSeconds: termSeconds, + } +} + +// secondsPerYear is the AWS-canonical RI duration constant for a 1-year +// term (365 × 86400). Matches the value the AWS SDK reports on +// ec2.ReservedInstance.Duration and the value +// ec2.ConvertibleRI.Duration carries — used so OfferingOption.TermSeconds +// can be compared directly against RIInfo.TermSeconds. +const secondsPerYear int64 = 365 * 24 * 60 * 60 + +// splitInstanceType splits "m5.xlarge" into ("m5", "xlarge"). Returns +// empty strings if the format is unrecognized. Mirrors the helper in +// pkg/exchange/reshape.go but kept local to avoid exporting a +// general-purpose parser the caller doesn't need. +func splitInstanceType(instanceType string) (family, size string) { + parts := strings.SplitN(instanceType, ".", 2) + if len(parts) != 2 { + return "", "" + } + return parts[0], parts[1] +} + +// resolveAWSCloudAccountID maps the running AWS account ID (raw, from +// STS) to the registered CloudAccount UUID so the reshape lookup can +// scope its query against the correct row in the recommendations +// table. Returns ("", nil) ONLY for the truly-no-scope cases: +// +// - AWS SDK config could not load (deployment is running on an +// Azure / GCP host with no AWS context at all — resolveAWSAccountID +// returns ("", nil) for this case); +// - ListCloudAccounts returned ZERO rows (no CloudAccounts registered +// at all — the bootstrap-before-first-account path; the recs table +// is also empty so an unscoped read is harmless). +// +// purchaseRecLookupFromStore treats ("", nil) as "skip the AccountIDs +// filter" so a deployment with ambient credentials and no registered +// CloudAccounts still sees alternatives, not a permanently empty list. +// Once the operator registers the account the filter engages. +// +// FAIL CLOSED on every real failure: +// - resolveAWSAccountID returns a non-nil error (STS GetCallerIdentity +// denied, transient AWS API failure, token expiry) — propagated so +// the caller aborts the lookup rather than silently falling through +// to an unscoped query that could leak another tenant's recs. +// - ListCloudAccounts returns an error (DB outage, permissions) — +// same treatment. +// - The running AWS account is resolved but DOES NOT match any +// registered CloudAccount AND there ARE registered accounts: +// return an error. Returning ("", nil) here would have +// purchaseRecLookupFromStore omit the AccountIDs filter and serve +// up another tenant's recs — exactly the multi-tenant leak the rest +// of this code path is designed to prevent. +func (h *Handler) resolveAWSCloudAccountID(ctx context.Context) (string, error) { + awsAccountID, err := h.resolveAWSAccountID(ctx) + if err != nil { + // STS reachable-but-failed: must NOT fall through to an + // unscoped read. A transient STS error in a multi-tenant + // deployment would otherwise surface another tenant's recs. + return "", fmt.Errorf("resolve source aws account for reshape scope: %w", err) + } + if awsAccountID == "" { + // Genuine "no AWS context" (Azure/GCP host). + return "", nil + } + provider := "aws" + accounts, err := h.config.ListCloudAccounts(ctx, config.CloudAccountFilter{Provider: &provider}) + if err != nil { + return "", fmt.Errorf("list cloud accounts for reshape scope: %w", err) + } + if len(accounts) == 0 { + // Bootstrap: no CloudAccounts registered at all. The recs + // table is necessarily empty too, so an unscoped read is a + // no-op rather than a leak. + return "", nil + } + for _, a := range accounts { + if a.ExternalID == awsAccountID { + return a.ID, nil + } + } + // Resolved-but-unregistered: AWS host has accounts, but the running + // account isn't one of them. Could be a misconfigured deployment, a + // fresh first-run before the operator added their own account, or + // (worst case) a host running in a different account than any + // registered tenant. Either way, returning "" here would skip the + // AccountIDs filter and leak other tenants' recs — fail closed. + return "", fmt.Errorf("running aws account %s is not registered in cloud_accounts; reshape scope cannot be resolved safely", awsAccountID) +} diff --git a/internal/api/exchange_lookup_test.go b/internal/api/exchange_lookup_test.go new file mode 100644 index 00000000..5eedcb21 --- /dev/null +++ b/internal/api/exchange_lookup_test.go @@ -0,0 +1,205 @@ +package api + +import ( + "context" + "errors" + "testing" + + "github.com/LeanerCloud/CUDly/internal/config" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// fakeRecsLister captures the filter passed to ListStoredRecommendations +// so tests can assert region / account / provider scoping landed in the +// SQL query. Returns a configurable result set or error. +type fakeRecsLister struct { + gotFilter config.RecommendationFilter + calls int + out []config.RecommendationRecord + err error +} + +func (f *fakeRecsLister) ListStoredRecommendations(_ context.Context, filter config.RecommendationFilter) ([]config.RecommendationRecord, error) { + f.calls++ + f.gotFilter = filter + return f.out, f.err +} + +// TestPurchaseRecLookupFromStore_RegionFilter pins that the closure +// pushes the requested region into the SQL filter so Postgres prunes +// rows by region rather than the Go layer doing it after the fact. +func TestPurchaseRecLookupFromStore_RegionFilter(t *testing.T) { + t.Parallel() + store := &fakeRecsLister{} + lookup := purchaseRecLookupFromStore(store, "") + _, err := lookup(context.Background(), "eu-west-1", "USD") + require.NoError(t, err) + assert.Equal(t, 1, store.calls, "lookup must invoke the store exactly once") + assert.Equal(t, "aws", store.gotFilter.Provider, "must scope to AWS recs") + assert.Equal(t, "ec2", store.gotFilter.Service, "must scope to EC2 recs (no RDS / opensearch leakage)") + assert.Equal(t, "eu-west-1", store.gotFilter.Region, "region must thread through to SQL") +} + +// TestPurchaseRecLookupFromStore_AccountFilter pins the cross-account +// leak guard: when an account UUID is supplied, the filter restricts +// the query to that single account so the reshape page can't surface +// another tenant's recommendations. +func TestPurchaseRecLookupFromStore_AccountFilter(t *testing.T) { + t.Parallel() + store := &fakeRecsLister{} + lookup := purchaseRecLookupFromStore(store, "acct-uuid-123") + _, err := lookup(context.Background(), "us-east-1", "USD") + require.NoError(t, err) + require.Len(t, store.gotFilter.AccountIDs, 1, "non-empty account UUID must populate AccountIDs filter") + assert.Equal(t, "acct-uuid-123", store.gotFilter.AccountIDs[0]) +} + +// TestPurchaseRecLookupFromStore_NoAccountFilterWhenEmpty pins the +// degraded-mode contract: when the caller can't resolve an account +// UUID (ambient credentials, account not registered yet), the lookup +// returns whatever recs exist in the region rather than blanking the +// page. The operator can register the account later to engage scoping. +func TestPurchaseRecLookupFromStore_NoAccountFilterWhenEmpty(t *testing.T) { + t.Parallel() + store := &fakeRecsLister{} + lookup := purchaseRecLookupFromStore(store, "") + _, err := lookup(context.Background(), "us-east-1", "USD") + require.NoError(t, err) + assert.Empty(t, store.gotFilter.AccountIDs, "empty account UUID must NOT add an AccountIDs filter") +} + +// TestPurchaseRecLookupFromStore_NoRecsReturnsEmpty pins the +// cold-cache contract: zero recs in the region → empty slice (not +// nil-error). The downstream AnalyzeReshapingWithRecs treats an empty +// slice the same as "no alternatives, primary target intact". +func TestPurchaseRecLookupFromStore_NoRecsReturnsEmpty(t *testing.T) { + t.Parallel() + store := &fakeRecsLister{out: nil} + lookup := purchaseRecLookupFromStore(store, "") + got, err := lookup(context.Background(), "us-east-1", "USD") + require.NoError(t, err) + assert.Empty(t, got, "empty recs → empty offerings, no error") +} + +// TestPurchaseRecLookupFromStore_StoreErrorPropagates pins the error +// path: an underlying SQL failure surfaces back to the caller. +// AnalyzeReshapingWithRecs handles the error by falling back to base +// recs (graceful degradation), so the closure just needs to forward +// the error verbatim. +func TestPurchaseRecLookupFromStore_StoreErrorPropagates(t *testing.T) { + t.Parallel() + store := &fakeRecsLister{err: errors.New("postgres timeout")} + lookup := purchaseRecLookupFromStore(store, "") + _, err := lookup(context.Background(), "us-east-1", "USD") + require.Error(t, err, "store errors must propagate so the caller can fall back") +} + +// TestPurchaseRecLookupFromStore_MapsFields pins the +// RecommendationRecord → OfferingOption mapping shape so the dollar- +// units pre-filter and the UI both see consistent data: +// - InstanceType comes from ResourceType. +// - OfferingID = rec.ID (stable handle). +// - EffectiveMonthlyCost = UpfrontCost / (Term * 12) + MonthlyCost. +// - NormalizationFactor resolved from the size (here "large" → 4). +// - CurrencyCode propagated from the lookup's currencyCode arg. +func TestPurchaseRecLookupFromStore_MapsFields(t *testing.T) { + t.Parallel() + store := &fakeRecsLister{ + out: []config.RecommendationRecord{ + { + ID: "rec-1", + Provider: "aws", + Service: "ec2", + Region: "us-east-1", + ResourceType: "m6i.large", + Term: 1, // 1 year term + UpfrontCost: 120, // 120 / 12 = 10/mo amortised + MonthlyCost: 20, // + 20/mo recurring = 30 + }, + { + // Term=0 → no upfront amortisation; effective = MonthlyCost only. + ID: "rec-2", + Provider: "aws", + Service: "ec2", + Region: "us-east-1", + ResourceType: "c5.xlarge", + Term: 0, + UpfrontCost: 500, // ignored when Term == 0 + MonthlyCost: 50, + }, + }, + } + lookup := purchaseRecLookupFromStore(store, "") + got, err := lookup(context.Background(), "us-east-1", "EUR") + require.NoError(t, err) + require.Len(t, got, 2) + + assert.Equal(t, "m6i.large", got[0].InstanceType) + assert.Equal(t, "rec-1", got[0].OfferingID) + assert.InDelta(t, 30.0, got[0].EffectiveMonthlyCost, 0.001, + "upfront 120 over 12 months = 10/mo + recurring 20/mo = 30/mo") + assert.InDelta(t, 4.0, got[0].NormalizationFactor, 0.001, "large → NF 4") + assert.Equal(t, "EUR", got[0].CurrencyCode, "currency must be propagated from caller") + + assert.Equal(t, "c5.xlarge", got[1].InstanceType) + assert.InDelta(t, 50.0, got[1].EffectiveMonthlyCost, 0.001, + "Term==0 means upfront cannot be amortised; fall back to MonthlyCost") + assert.InDelta(t, 8.0, got[1].NormalizationFactor, 0.001, "xlarge → NF 8") + + // Term plumbing: 1y rec → 31_536_000 seconds (AWS canonical RI + // duration); Term==0 → TermSeconds==0 (the reshape term-match guard + // then falls back to "skip the gate" rather than blocking the rec). + assert.Equal(t, int64(365*24*60*60), got[0].TermSeconds, + "1-year rec must serialise to 31_536_000s for the term-match guard") + assert.Equal(t, int64(0), got[1].TermSeconds, + "Term==0 rec must not synthesise a fake duration — TermSeconds stays zero") +} + +// TestPurchaseRecLookupFromStore_ThreeYearTerm pins the multi-year +// path: rec.Term=3 must serialise to exactly 3 × 31_536_000s so the +// reshape term-match guard treats it as 3y rather than rounding it +// onto a 1y surface. +func TestPurchaseRecLookupFromStore_ThreeYearTerm(t *testing.T) { + t.Parallel() + store := &fakeRecsLister{ + out: []config.RecommendationRecord{ + { + ID: "rec-3y", Provider: "aws", Service: "ec2", Region: "us-east-1", + ResourceType: "m5.large", Term: 3, MonthlyCost: 10, + }, + }, + } + lookup := purchaseRecLookupFromStore(store, "") + got, err := lookup(context.Background(), "us-east-1", "USD") + require.NoError(t, err) + require.Len(t, got, 1) + assert.Equal(t, int64(3*365*24*60*60), got[0].TermSeconds, + "3-year rec must serialise to 3 × 31_536_000s for the term-match guard") +} + +// TestSplitInstanceType pins the local instance-type parser used by +// the mapping helper. Mirrors the pkg/exchange parser to avoid +// exporting a general-purpose helper this package doesn't need. +func TestSplitInstanceType(t *testing.T) { + t.Parallel() + cases := []struct { + in string + wantFamily string + wantSize string + }{ + {"m5.large", "m5", "large"}, + {"m7g.metal", "m7g", "metal"}, + {"r6i.16xlarge", "r6i", "16xlarge"}, + {"", "", ""}, + {"malformed", "", ""}, + } + for _, c := range cases { + t.Run(c.in, func(t *testing.T) { + t.Parallel() + f, s := splitInstanceType(c.in) + assert.Equal(t, c.wantFamily, f) + assert.Equal(t, c.wantSize, s) + }) + } +} diff --git a/internal/api/handler.go b/internal/api/handler.go index 276fa85e..83980e97 100644 --- a/internal/api/handler.go +++ b/internal/api/handler.go @@ -70,6 +70,14 @@ type Handler struct { reshapeEC2Factory func(aws.Config) reshapeEC2Client reshapeRecsFactory func(aws.Config) reshapeRecsClient + // Optional account-resolver injection point used by the reshape + // handler integration test. When nil (the production default), the + // handler calls h.resolveAWSCloudAccountID which in turn invokes + // sts.GetCallerIdentity — fine in Lambda but fails on dev machines + // without AWS credentials. Tests set this to a fixed-result fake so + // the integration suite runs hermetically. + reshapeAccountResolver func(context.Context) (string, error) + // commitmentOpts discovers which AWS (term, payment) combinations // each service actually sells and validates saves against that data. // Nil is valid: the endpoint returns unavailable and save-side @@ -393,20 +401,26 @@ func (h *Handler) buildResponse(statusCode int, headers map[string]string, body } // loadAPIKey retrieves the API key from Secrets Manager. -// It caches the base AWS config via awsCfgOnce to avoid redundant config loads. +// +// Uses a local AWS config load — does NOT share the request-path +// awsCfgOnce/awsCfgErr cache. Reason: NewHandler runs loadAPIKey at +// cold-start with a 5-second deadline. If that deadline fires, the +// shared sync.Once would seal awsCfgErr = "context deadline exceeded" +// for the entire Lambda lifetime, permanently breaking the request-path +// resolveAWSCallerIdentity call (and the multi-tenant scope filter that +// depends on it). Loading config locally keeps the cold-start timeout +// scoped to this one call. func (h *Handler) loadAPIKey(ctx context.Context) (string, error) { if h.secretsARN == "" { return "", nil } - h.awsCfgOnce.Do(func() { - h.awsCfg, h.awsCfgErr = awsconfig.LoadDefaultConfig(ctx) - }) - if h.awsCfgErr != nil { - return "", h.awsCfgErr + cfg, err := awsconfig.LoadDefaultConfig(ctx) + if err != nil { + return "", err } - client := secretsmanager.NewFromConfig(h.awsCfg) + client := secretsmanager.NewFromConfig(cfg) result, err := client.GetSecretValue(ctx, &secretsmanager.GetSecretValueInput{ SecretId: &h.secretsARN, }) @@ -458,7 +472,15 @@ func (h *Handler) resolveSourceIdentity(ctx context.Context) *sourceIdentity { id := &sourceIdentity{Provider: cloud} switch cloud { case "aws": - id.AccountID, id.Partition = h.resolveAWSCallerIdentity(ctx) + // resolveSourceIdentity is best-effort and is consumed by + // populateSourceAccountID, which fails loud on an empty + // AccountID for the AWS-source case. STS errors are already + // logged WARN inside resolveAWSCallerIdentity, so we drop + // the error here explicitly — the consumer's empty-string + // check is the security gate for federation rendering. + // (Reshape uses resolveAWSAccountID directly which DOES + // propagate the error for fail-closed multi-tenant safety.) + id.AccountID, id.Partition, _ = h.resolveAWSCallerIdentity(ctx) case "azure": id.ClientID = os.Getenv("AZURE_CLIENT_ID") id.SubscriptionID = os.Getenv("AZURE_SUBSCRIPTION_ID") @@ -478,38 +500,75 @@ func (h *Handler) resolveSourceAccountID(ctx context.Context) string { } // resolveAWSAccountID returns the AWS account ID where CUDly runs by calling -// STS GetCallerIdentity. Returns "" on AWS SDK config load failure or STS -// error (logs WARN in either case). +// STS GetCallerIdentity. Convenience wrapper around resolveAWSCallerIdentity +// for callers that only need the account ID (and the security-paths-aware +// error propagation). +// +// Return shape distinguishes three cases: // -// WARNING: empty string is the silent-failure path. Callers in user-facing -// flows MUST check the result and fail loud rather than rendering an empty -// account ID into a bundle — a bundle with an empty source_account_id -// produces a broken trust policy that silently fails at apply time. -func (h *Handler) resolveAWSAccountID(ctx context.Context) string { - id, _ := h.resolveAWSCallerIdentity(ctx) - return id +// - ("", nil) — AWS SDK config could not load (deployment runs +// without AWS context: e.g. Azure / GCP host). +// This is the legitimate "no AWS account configured" +// signal; callers may treat it as such. +// - ("", err) — AWS SDK config loaded but STS GetCallerIdentity +// failed (transient API error, missing IAM permission, +// token expiry). Callers in security-sensitive paths +// (e.g. multi-tenant scope filters) MUST fail closed +// on this — treating it like the legitimate empty +// case can leak data across tenants. +// - (accountID, nil) — success. +// +// WARNING: callers in user-facing flows must check the result and fail loud +// rather than rendering an empty account ID into a bundle — a bundle with +// an empty source_account_id produces a broken trust policy that silently +// fails at apply time. +func (h *Handler) resolveAWSAccountID(ctx context.Context) (string, error) { + id, _, err := h.resolveAWSCallerIdentity(ctx) + return id, err } // resolveAWSCallerIdentity returns (accountID, partition) parsed from STS // GetCallerIdentity. The partition is taken from the returned ARN's second // segment (e.g., `arn:aws-us-gov:iam::...` → "aws-us-gov") and is used by // the trust-policy snippet renderer to emit the correct ARN prefix in -// AWS China and GovCloud deployments (issue #130c). Both fields are best- -// effort: a STS failure returns ("", "") and a malformed ARN returns -// (accountID, "") — the frontend defaults to the `aws` partition when -// the value is empty. -func (h *Handler) resolveAWSCallerIdentity(ctx context.Context) (string, string) { +// AWS China and GovCloud deployments (issue #130c). +// +// Return shape: +// +// - ("", "", nil) — host is non-AWS (sourceCloud() != "aws") +// AND AWS SDK config could not load. This is the legitimate +// "no AWS context" path for Azure/GCP-hosted deployments. +// - ("", "", err) — host is AWS but the SDK config could +// not load, OR config loaded but STS GetCallerIdentity failed. +// Both are real failures; security-sensitive callers MUST fail +// closed on this. +// - (accountID, partition, nil) — success. partition may still be "" if the +// ARN was malformed (frontend defaults to "aws"). +// +// Non-AWS hosts (Azure/GCP) short-circuit before any AWS SDK work — +// no point burning a LoadDefaultConfig attempt + STS client on every +// reshape request from a host that genuinely has no AWS context. AWS +// hosts with a broken SDK config surface the load error so the +// multi-tenant scope filter in resolveAWSCloudAccountID fails closed +// instead of degrading into an unscoped read. +func (h *Handler) resolveAWSCallerIdentity(ctx context.Context) (string, string, error) { + if sourceCloud() != "aws" { + // Azure/GCP host: short-circuit before any AWS SDK work. + return "", "", nil + } h.awsCfgOnce.Do(func() { h.awsCfg, h.awsCfgErr = awsconfig.LoadDefaultConfig(ctx) }) if h.awsCfgErr != nil { - return "", "" + // AWS host but SDK config broken: real failure. Surface the + // error so security-sensitive callers fail closed. + return "", "", fmt.Errorf("aws sdk config load: %w", h.awsCfgErr) } client := sts.NewFromConfig(h.awsCfg) identity, err := client.GetCallerIdentity(ctx, &sts.GetCallerIdentityInput{}) if err != nil { logging.Warnf("Failed to resolve source account ID via STS: %v", err) - return "", "" + return "", "", fmt.Errorf("sts get-caller-identity: %w", err) } var accountID, partition string if identity.Account != nil { @@ -518,7 +577,7 @@ func (h *Handler) resolveAWSCallerIdentity(ctx context.Context) (string, string) if identity.Arn != nil { partition = parseArnPartition(*identity.Arn) } - return accountID, partition + return accountID, partition, nil } // parseArnPartition extracts the partition segment from an AWS ARN. diff --git a/internal/api/handler_ri_exchange.go b/internal/api/handler_ri_exchange.go index 65104bf4..ecf2c4f6 100644 --- a/internal/api/handler_ri_exchange.go +++ b/internal/api/handler_ri_exchange.go @@ -29,9 +29,13 @@ import ( // awsprovider.NewEC2ClientDirect already implements these methods // (Go structural typing), so the nil-factory fallback path casts it // directly. +// +// Cross-family alternatives no longer flow through here — they're +// sourced from the cached AWS Cost Explorer purchase recommendations +// in Postgres via purchaseRecLookupFromStore (see exchange_lookup.go), +// so the EC2 client only needs to enumerate convertible RIs. type reshapeEC2Client interface { ListConvertibleReservedInstances(ctx context.Context) ([]ec2svc.ConvertibleRI, error) - FindConvertibleOfferings(ctx context.Context, instanceTypes []string) ([]exchange.OfferingOption, error) } // reshapeRecsClient is the narrow slice of the recommendations @@ -189,6 +193,11 @@ func convertToExchangeTypes(instances []ec2svc.ConvertibleRI, utilData []recomme NormalizationFactor: inst.NormalizationFactor, MonthlyCost: monthlyCostFromConvertibleRI(inst), CurrencyCode: inst.CurrencyCode, + // Plumb the AWS-reported RI duration straight through — + // reshape's term-match guard rejects alternatives whose + // TermSeconds differs from the source so a 3y RI never + // surfaces as an alternative to a 1y commitment. + TermSeconds: inst.Duration, } } @@ -225,6 +234,15 @@ func (h *Handler) getReshapeRecommendations(ctx context.Context, req *events.Lam if err != nil { return nil, fmt.Errorf("failed to load AWS config: %w", err) } + // Normalize region: when the caller omits ?region=, loadAWSConfigWithRegion + // resolves a default from the AWS SDK chain but the local string stays + // empty — which would scope the RI utilization cache and the recs lookup + // unscoped, leaking alternatives from other regions onto the reshape page. + // Adopt the resolved region so every downstream consumer sees the same + // value the AWS clients are actually talking to. + if region == "" { + region = cfg.Region + } ec2Client := h.buildReshapeEC2Client(cfg) instances, err := ec2Client.ListConvertibleReservedInstances(ctx) @@ -239,19 +257,48 @@ func (h *Handler) getReshapeRecommendations(ctx context.Context, req *events.Lam } riInfos, utilInfos := convertToExchangeTypes(instances, utilData) - // Offering lookup closure uses the already-configured ec2Client, - // so no extra AWS config plumbing is needed. Errors from the - // lookup are swallowed inside AnalyzeReshapingWithOfferings — - // losing pricing chips is strictly less bad than losing the whole - // reshape page. - lookup := func(ctx context.Context, instanceTypes []string) ([]exchange.OfferingOption, error) { - return ec2Client.FindConvertibleOfferings(ctx, instanceTypes) + // Cross-family alternatives are sourced from the cached AWS Cost + // Explorer purchase recommendations table in Postgres — no per-rec + // DescribeReservedInstancesOfferings fan-out, no hand-curated + // peer-family allowlist. The lookup is scoped to the running AWS + // account (when registered) so a multi-tenant deployment can't + // surface another tenant's recs. Empty resolved account ID means + // "no scope filter" for ambient-credentials deployments where + // CloudAccount registration hasn't happened yet; a real ListCloudAccounts + // error aborts the request instead of silently falling through to an + // unscoped query that could match the wrong tenant's recs. + resolveAccount := h.resolveAWSCloudAccountID + if h.reshapeAccountResolver != nil { + // Test injection — bypasses sts.GetCallerIdentity so the + // integration suite runs without AWS credentials. + resolveAccount = h.reshapeAccountResolver + } + cloudAccountID, err := resolveAccount(ctx) + if err != nil { + return nil, fmt.Errorf("failed to resolve cloud account scope for reshape: %w", err) } - recs := exchange.AnalyzeReshapingWithOfferings(ctx, riInfos, utilInfos, threshold, lookup) + currencyCode := firstNonEmptyCurrency(instances) + lookup := purchaseRecLookupFromStore(h.config, cloudAccountID) + recs := exchange.AnalyzeReshapingWithRecs(ctx, riInfos, utilInfos, threshold, region, currencyCode, lookup) return &ReshapeRecommendationsResponse{Recommendations: recs}, nil } +// firstNonEmptyCurrency returns the CurrencyCode of the first RI that +// has one set, defaulting to "USD" for legacy fixtures and the common +// case. The reshape page operates on a single AWS account at a time so +// all RIs share the same currency in practice; picking the first +// populated value is sufficient and avoids a noisy mismatch panic when +// some entries are missing the field. +func firstNonEmptyCurrency(instances []ec2svc.ConvertibleRI) string { + for _, inst := range instances { + if inst.CurrencyCode != "" { + return inst.CurrencyCode + } + } + return "USD" +} + // getExchangeQuote gets a quote for an RI exchange. func (h *Handler) getExchangeQuote(ctx context.Context, req *events.LambdaFunctionURLRequest) (any, error) { if _, err := h.requirePermission(ctx, req, "view", "purchases"); err != nil { diff --git a/internal/api/handler_ri_exchange_integration_test.go b/internal/api/handler_ri_exchange_integration_test.go index c0f6ea15..bb6a09a5 100644 --- a/internal/api/handler_ri_exchange_integration_test.go +++ b/internal/api/handler_ri_exchange_integration_test.go @@ -6,9 +6,10 @@ package api import ( "context" "encoding/json" - "fmt" + "strings" "sync/atomic" "testing" + "time" "github.com/aws/aws-lambda-go/events" "github.com/aws/aws-sdk-go-v2/aws" @@ -16,34 +17,23 @@ import ( "github.com/LeanerCloud/CUDly/internal/config" "github.com/LeanerCloud/CUDly/internal/database/postgres/migrations" "github.com/LeanerCloud/CUDly/internal/database/postgres/testhelpers" - "github.com/LeanerCloud/CUDly/pkg/exchange" "github.com/LeanerCloud/CUDly/providers/aws/recommendations" ec2svc "github.com/LeanerCloud/CUDly/providers/aws/services/ec2" "github.com/stretchr/testify/require" ) // fakeReshapeEC2 is a stub implementation of reshapeEC2Client for the -// integration test. Returns fixed RIs + fixed offerings (or an error -// when errOnOfferings is set). Lets the test assert on what the -// reshape handler does with the AWS-returned shapes without hitting -// real AWS. +// integration test. Returns fixed convertible RIs without hitting real +// AWS. Cross-family alternatives now flow through the recommendations +// store (see purchaseRecLookupFromStore), so this fake no longer needs +// to mock FindConvertibleOfferings. type fakeReshapeEC2 struct { - instances []ec2svc.ConvertibleRI - offerings []exchange.OfferingOption - offeringsErr error - offeringsCalled atomic.Int32 + instances []ec2svc.ConvertibleRI } func (f *fakeReshapeEC2) ListConvertibleReservedInstances(_ context.Context) ([]ec2svc.ConvertibleRI, error) { return f.instances, nil } -func (f *fakeReshapeEC2) FindConvertibleOfferings(_ context.Context, _ []string) ([]exchange.OfferingOption, error) { - f.offeringsCalled.Add(1) - if f.offeringsErr != nil { - return nil, f.offeringsErr - } - return f.offerings, nil -} // fakeReshapeRecs is a stub for reshapeRecsClient. Counts calls so the // test can assert cache-hit behaviour on the second request. @@ -78,6 +68,12 @@ func buildReshapeHandler(store *config.PostgresStore, ec2Fake *fakeReshapeEC2, r apiKey: "test-api-key", reshapeEC2Factory: func(_ aws.Config) reshapeEC2Client { return ec2Fake }, reshapeRecsFactory: func(_ aws.Config) reshapeRecsClient { return recsFake }, + // Bypass STS GetCallerIdentity so the test runs without real + // AWS credentials. Empty cloud-account ID = no AccountIDs + // filter on the recs lookup, which is the legitimate + // "no-scope-filter" path; tests that assert scope filtering + // would set this to a UUID matching a seeded CloudAccount. + reshapeAccountResolver: func(_ context.Context) (string, error) { return "", nil }, } // Pre-populate the AWS config cache so loadAWSConfigWithRegion // returns immediately without LoadDefaultConfig. The Region field @@ -100,24 +96,44 @@ func reshapeRequest() *events.LambdaFunctionURLRequest { } } +// seedRecsForRegion writes a snapshot of cached Cost Explorer purchase +// recommendations directly into the store so the reshape lookup has +// something to read. Bypasses the scheduler so the test only exercises +// the read-side mapping logic. +func seedRecsForRegion(ctx context.Context, t *testing.T, store *config.PostgresStore, region string, recs []config.RecommendationRecord) { + t.Helper() + require.NoError(t, store.ReplaceRecommendations(ctx, time.Now(), recs), + "seeding recommendations into the test container failed") +} + // TestReshapeRecommendations_Integration_EndToEnd exercises the full -// path: auth → cache cold-fetch → AnalyzeReshapingWithOfferings -// pricing enrichment → JSON response. Asserts the response carries -// alternative_targets sorted ascending by effective_monthly_cost. +// path: auth → cache cold-fetch → AnalyzeReshapingWithRecs (recs-driven +// alternatives) → JSON response. Asserts the response carries +// alternative_targets sorted ascending by effective_monthly_cost, +// sourced from the recommendations table rather than per-rec AWS API +// calls. func TestReshapeRecommendations_Integration_EndToEnd(t *testing.T) { ctx := context.Background() store, cleanup := setupReshapeHandlerIntegration(ctx, t) defer cleanup() + // Seed cross-family recs in us-east-1. Same-family seed uses a + // different SIZE (m5.2xlarge) than the primary target (m5.large) + // so the test exercises the FAMILY-exclusion filter rather than + // trivial exact-match dedupe. Handler should surface c5 + r5 as + // cross-family alternatives ordered by EffectiveMonthlyCost; + // m5.2xlarge must NOT appear despite being a different size — + // same-family RIs aren't valid Convertible exchange targets. + seedRecsForRegion(ctx, t, store, "us-east-1", []config.RecommendationRecord{ + {Provider: "aws", Service: "ec2", Region: "us-east-1", ResourceType: "m5.2xlarge", Term: 1, MonthlyCost: 40}, + {Provider: "aws", Service: "ec2", Region: "us-east-1", ResourceType: "c5.large", Term: 1, MonthlyCost: 50}, + {Provider: "aws", Service: "ec2", Region: "us-east-1", ResourceType: "r5.large", Term: 1, MonthlyCost: 60}, + }) + ec2Fake := &fakeReshapeEC2{ instances: []ec2svc.ConvertibleRI{ {ReservedInstanceID: "ri-1", InstanceType: "m5.xlarge", InstanceCount: 1}, }, - offerings: []exchange.OfferingOption{ - {InstanceType: "m5.large", OfferingID: "off-m5", EffectiveMonthlyCost: 40.0}, - {InstanceType: "m6i.large", OfferingID: "off-m6i", EffectiveMonthlyCost: 35.0}, - {InstanceType: "m7g.large", OfferingID: "off-m7g", EffectiveMonthlyCost: 30.0}, - }, } recsFake := &fakeReshapeRecs{ utilization: []recommendations.RIUtilization{ @@ -135,17 +151,19 @@ func TestReshapeRecommendations_Integration_EndToEnd(t *testing.T) { require.Equal(t, "ri-1", rec.SourceRIID) require.Equal(t, "m5.large", rec.TargetInstanceType) - // AnalyzeReshapingWithOfferings drops the source family (m5) from - // the alternatives, so we expect m6i.large + m7g.large only. - // Alternatives are sorted ascending by EffectiveMonthlyCost — - // cheapest first — so m7g ($30) comes before m6i ($35). - require.Len(t, rec.AlternativeTargets, 2) - require.Equal(t, "m7g.large", rec.AlternativeTargets[0].InstanceType) - require.Equal(t, "off-m7g", rec.AlternativeTargets[0].OfferingID) - require.InDelta(t, 30.0, rec.AlternativeTargets[0].EffectiveMonthlyCost, 0.001) - require.Equal(t, "m6i.large", rec.AlternativeTargets[1].InstanceType) - require.Equal(t, "off-m6i", rec.AlternativeTargets[1].OfferingID) - require.InDelta(t, 35.0, rec.AlternativeTargets[1].EffectiveMonthlyCost, 0.001) + // Same-family m5 stripped; c5 + r5 surface ascending by cost. + require.Len(t, rec.AlternativeTargets, 2, + "cross-family alternatives must come from the cached recs (same-family m5.2xlarge excluded)") + require.Equal(t, "c5.large", rec.AlternativeTargets[0].InstanceType) + require.InDelta(t, 50.0, rec.AlternativeTargets[0].EffectiveMonthlyCost, 0.001) + require.Equal(t, "r5.large", rec.AlternativeTargets[1].InstanceType) + require.InDelta(t, 60.0, rec.AlternativeTargets[1].EffectiveMonthlyCost, 0.001) + // Independent regression guard: no m5.* of any size should appear + // in alternatives — pins the family-exclusion contract. + for _, alt := range rec.AlternativeTargets { + require.False(t, strings.HasPrefix(alt.InstanceType, "m5."), + "same-family m5.* alternatives must be excluded, got %s", alt.InstanceType) + } // Verify the RI utilization cache row landed in Postgres. entry, err := store.GetRIUtilizationCache(ctx, "us-east-1", 30) @@ -156,26 +174,28 @@ func TestReshapeRecommendations_Integration_EndToEnd(t *testing.T) { require.Len(t, cached, 1) require.Equal(t, "ri-1", cached[0].ReservedInstanceID) - // Fetcher + offerings each called exactly once on the cold path. + // Utilization fetcher called exactly once on the cold path. require.Equal(t, int32(1), recsFake.calls.Load()) - require.Equal(t, int32(1), ec2Fake.offeringsCalled.Load()) } // TestReshapeRecommendations_Integration_SecondCallHitsCache verifies // the cache short-circuits the RI utilization fetcher on a second -// request within TTL. +// request within TTL. The recs lookup is a Postgres read on every +// request (no separate cache layer); only the Cost Explorer +// utilization fetcher is TTL-cached. func TestReshapeRecommendations_Integration_SecondCallHitsCache(t *testing.T) { ctx := context.Background() store, cleanup := setupReshapeHandlerIntegration(ctx, t) defer cleanup() + seedRecsForRegion(ctx, t, store, "us-east-1", []config.RecommendationRecord{ + {Provider: "aws", Service: "ec2", Region: "us-east-1", ResourceType: "c5.large", Term: 1, MonthlyCost: 40}, + }) + ec2Fake := &fakeReshapeEC2{ instances: []ec2svc.ConvertibleRI{ {ReservedInstanceID: "ri-1", InstanceType: "m5.xlarge", InstanceCount: 1}, }, - offerings: []exchange.OfferingOption{ - {InstanceType: "m5.large", OfferingID: "off-m5", EffectiveMonthlyCost: 40.0}, - }, } recsFake := &fakeReshapeRecs{ utilization: []recommendations.RIUtilization{ @@ -194,26 +214,24 @@ func TestReshapeRecommendations_Integration_SecondCallHitsCache(t *testing.T) { require.NoError(t, err) require.Equal(t, int32(1), recsFake.calls.Load(), "fresh read within soft TTL must not re-invoke the utilization fetcher") - - // Offerings lookup is NOT cached (distinct concern) — expected to - // be called on each request. - require.Equal(t, int32(2), ec2Fake.offeringsCalled.Load()) } -// TestReshapeRecommendations_Integration_OfferingsLookupErrorKeepsNameOnlyAlternatives -// verifies the graceful-degradation contract: when FindConvertibleOfferings -// returns an error, the base recs still ship with name-only -// alternatives (no OfferingID, zero cost). -func TestReshapeRecommendations_Integration_OfferingsLookupErrorKeepsNameOnlyAlternatives(t *testing.T) { +// TestReshapeRecommendations_Integration_NoCachedRecsReturnsPrimaryOnly +// — when the recommendations table is empty for the region, the rec +// still ships with its primary same-family target but with no +// alternatives. UX matches "AWS hasn't recommended anything for this +// region yet" rather than blanking the page. +func TestReshapeRecommendations_Integration_NoCachedRecsReturnsPrimaryOnly(t *testing.T) { ctx := context.Background() store, cleanup := setupReshapeHandlerIntegration(ctx, t) defer cleanup() + // Note: NO seedRecsForRegion call — table starts empty. + ec2Fake := &fakeReshapeEC2{ instances: []ec2svc.ConvertibleRI{ {ReservedInstanceID: "ri-1", InstanceType: "m5.xlarge", InstanceCount: 1}, }, - offeringsErr: fmt.Errorf("simulated cost-explorer 5xx"), } recsFake := &fakeReshapeRecs{ utilization: []recommendations.RIUtilization{ @@ -223,20 +241,129 @@ func TestReshapeRecommendations_Integration_OfferingsLookupErrorKeepsNameOnlyAlt h := buildReshapeHandler(store, ec2Fake, recsFake) resp, err := h.getReshapeRecommendations(ctx, reshapeRequest()) - require.NoError(t, err, "handler must not fail when the offerings lookup errors — graceful degradation") + require.NoError(t, err, "handler must not fail when the recommendations cache is empty") body := resp.(*ReshapeRecommendationsResponse) require.Len(t, body.Recommendations, 1) rec := body.Recommendations[0] require.Equal(t, "m5.large", rec.TargetInstanceType, "primary target stays intact") + require.Empty(t, rec.AlternativeTargets, + "empty cache → no alternatives, primary target still surfaced") +} + +// seedCloudAccount inserts a minimal CloudAccount row and returns its +// generated UUID. Used by the scoped-account integration test so the +// recommendations table can carry a cloud_account_id that satisfies the +// FK to cloud_accounts(id). All fields beyond Provider / ExternalID / +// Name are left at their zero value — the recs scope filter joins only +// on cloud_account_id, so credentials and auth-mode plumbing are +// irrelevant for this test. +func seedCloudAccount(ctx context.Context, t *testing.T, store *config.PostgresStore, externalID, name string) string { + t.Helper() + account := &config.CloudAccount{ + Name: name, + Provider: "aws", + ExternalID: externalID, + Enabled: true, + } + require.NoError(t, store.CreateCloudAccount(ctx, account), + "seeding CloudAccount %q failed", externalID) + require.NotEmpty(t, account.ID, "CreateCloudAccount must generate a UUID") + return account.ID +} + +// TestReshapeRecommendations_Integration_ScopedAccount_FiltersToAccount +// is the companion to the existing unscoped integration tests +// (CodeRabbit pass-5 finding 2). The other tests all wire +// reshapeAccountResolver to ("", nil), exercising only the unscoped +// "no AccountIDs filter" path. This test exercises the scoped branch: +// +// 1. Seed two CloudAccounts (A and B) so the cloud_account_id FK on +// the recommendations table is satisfied for both rows. +// 2. Seed two recs in the same region — A's is c5.large, B's is +// r5.large — each tagged with its account's UUID. +// 3. Override reshapeAccountResolver to return A's UUID, simulating +// the production path where the running AWS account maps to one +// registered CloudAccount via h.resolveAWSCloudAccountID. +// 4. Assert the response only surfaces c5.large in alternatives, not +// r5.large — proving the recs query was scoped by AccountIDs +// rather than served as an unscoped read across both tenants. +// +// This is the cross-tenant leak guard the rest of the reshape path +// relies on. Without this test, the scoped branch had zero coverage — +// a regression that silently dropped the AccountIDs filter (e.g. a +// future refactor reintroducing the resolved-but-unregistered fall- +// through bug) would have shipped undetected. +func TestReshapeRecommendations_Integration_ScopedAccount_FiltersToAccount(t *testing.T) { + ctx := context.Background() + store, cleanup := setupReshapeHandlerIntegration(ctx, t) + defer cleanup() + + // Two registered accounts, distinct AWS account numbers. The recs + // scope filter compares against the CloudAccount UUID (not the AWS + // external ID), so the test resolver just hands back the UUID. + accountAID := seedCloudAccount(ctx, t, store, "111111111111", "Tenant A") + accountBID := seedCloudAccount(ctx, t, store, "222222222222", "Tenant B") + + // Seed one cross-family rec per tenant in the same region. If the + // scope filter is honoured, only Tenant A's c5.large surfaces; + // otherwise both alternatives leak through (the regression we're + // guarding against). + require.NoError(t, store.ReplaceRecommendations(ctx, time.Now(), []config.RecommendationRecord{ + { + Provider: "aws", + Service: "ec2", + Region: "us-east-1", + ResourceType: "c5.large", + Term: 1, + MonthlyCost: 50, + CloudAccountID: &accountAID, + }, + { + Provider: "aws", + Service: "ec2", + Region: "us-east-1", + ResourceType: "r5.large", + Term: 1, + MonthlyCost: 60, + CloudAccountID: &accountBID, + }, + }), "seeding scoped recommendations failed") + + ec2Fake := &fakeReshapeEC2{ + instances: []ec2svc.ConvertibleRI{ + {ReservedInstanceID: "ri-1", InstanceType: "m5.xlarge", InstanceCount: 1}, + }, + } + recsFake := &fakeReshapeRecs{ + utilization: []recommendations.RIUtilization{ + {ReservedInstanceID: "ri-1", UtilizationPercent: 50.0}, + }, + } + h := buildReshapeHandler(store, ec2Fake, recsFake) + // Override the unscoped default the helper sets: pretend the + // running AWS account resolves to Tenant A's CloudAccount UUID. + // Equivalent to h.resolveAWSCloudAccountID returning a real match. + h.reshapeAccountResolver = func(_ context.Context) (string, error) { + return accountAID, nil + } + + resp, err := h.getReshapeRecommendations(ctx, reshapeRequest()) + require.NoError(t, err) + body, ok := resp.(*ReshapeRecommendationsResponse) + require.True(t, ok, "response should be a ReshapeRecommendationsResponse") + require.Len(t, body.Recommendations, 1) + rec := body.Recommendations[0] + require.Equal(t, "ri-1", rec.SourceRIID) - // AnalyzeReshapingWithOfferings returns the base recs on lookup - // error, which means AlternativeTargets keeps its name-only - // entries from AnalyzeReshaping (instance_type set, offering_id - // empty, cost zero). - require.NotEmpty(t, rec.AlternativeTargets, "base recs still ship with name-only alternatives") + // Tenant A's c5.large is the only valid alternative; Tenant B's + // r5.large MUST be filtered out by the AccountIDs predicate. + require.Len(t, rec.AlternativeTargets, 1, + "scoped lookup must surface exactly one alternative — Tenant A's c5.large; "+ + "if Tenant B's r5.large appears, the cloud_account_id filter regressed "+ + "and recs are leaking across tenants") + require.Equal(t, "c5.large", rec.AlternativeTargets[0].InstanceType) for _, alt := range rec.AlternativeTargets { - require.NotEmpty(t, alt.InstanceType) - require.Empty(t, alt.OfferingID, "lookup error leaves offering_id blank") - require.Zero(t, alt.EffectiveMonthlyCost, "lookup error leaves cost zero") + require.NotEqual(t, "r5.large", alt.InstanceType, + "r5.large belongs to Tenant B and must never surface for a Tenant-A-scoped request") } } diff --git a/internal/api/handler_test.go b/internal/api/handler_test.go index 93140249..82c1e022 100644 --- a/internal/api/handler_test.go +++ b/internal/api/handler_test.go @@ -3,6 +3,7 @@ package api import ( "context" "encoding/json" + "sync" "testing" "time" @@ -65,6 +66,116 @@ func TestHandler_loadAPIKey_EmptyARN(t *testing.T) { assert.Empty(t, key) } +// TestLoadAPIKey_DoesNotPoisonSharedAWSConfigCache is a regression test +// for CodeRabbit pass-5 finding 1: loadAPIKey used to share +// h.awsCfgOnce/h.awsCfgErr with the request-path identity resolver, so a +// transient cold-start failure (NewHandler runs loadAPIKey under a 5s +// deadline) could permanently seal awsCfgErr for the entire Lambda +// container lifetime — and after the fail-closed STS error propagation +// landed, that meant the multi-tenant reshape scope filter would be +// permanently broken until the container recycled. +// +// The fix (commit 62667c8d2) made loadAPIKey load AWS config locally and +// stop touching the shared sync.Once. This test asserts that logical +// isolation rather than trying to provoke a config-load failure +// (LoadDefaultConfig itself rarely errs — credential failures surface +// later at the SDK call). Two halves: +// +// 1. h.awsCfgErr stays nil regardless of loadAPIKey's outcome — the +// shared error field must not be written at all. +// 2. h.awsCfgOnce is still virgin after loadAPIKey returns. Verified +// by having a sentinel sync.Once.Do fire (Do only runs the func if +// it has not yet been called). If loadAPIKey had used the shared +// Once, our sentinel call would no-op and the test would fail. +func TestLoadAPIKey_DoesNotPoisonSharedAWSConfigCache(t *testing.T) { + // Force a deterministic AWS config environment so the test does not + // depend on the dev machine's ambient credentials. The actual outcome + // of the GetSecretValue call is irrelevant — we only assert that the + // shared cache is left untouched on the way through. We CLEAR rather + // than set fake credential env-vars: setting an `AKIA…` shaped value + // trips the repo's git-secrets pre-commit scanner, and clearing them + // (combined with /dev/null shared-config files) is enough to keep + // LoadDefaultConfig deterministic — it will resolve to "no + // credentials" without hitting any user's ambient profile. + t.Setenv("AWS_REGION", "us-east-1") + t.Setenv("AWS_ACCESS_KEY_ID", "") + t.Setenv("AWS_SECRET_ACCESS_KEY", "") + t.Setenv("AWS_SESSION_TOKEN", "") + // Disable any locally-configured shared config / profile so the SDK + // never tries to read ~/.aws/config in CI. + t.Setenv("AWS_SDK_LOAD_CONFIG", "0") + t.Setenv("AWS_CONFIG_FILE", "/dev/null") + t.Setenv("AWS_SHARED_CREDENTIALS_FILE", "/dev/null") + t.Setenv("AWS_PROFILE", "") + // Disable IMDS so the SDK doesn't probe EC2 instance metadata on + // hosts where it's reachable (CI runners, dev EC2). Without this, + // the credential provider chain can hang for the full IMDS retry + // window before falling through to "no credentials" — making the + // test slow and host-dependent. (CodeRabbit nit on PR #79.) + t.Setenv("AWS_EC2_METADATA_DISABLED", "true") + + cases := []struct { + name string + secretsARN string + }{ + { + name: "empty ARN short-circuits before any AWS work", + secretsARN: "", + }, + { + name: "non-empty ARN goes through awsconfig.LoadDefaultConfig", + // Syntactically valid but unresolvable ARN. The SecretsManager + // call will fail (no network / fake creds), but the failure + // must NOT touch the shared awsCfg* fields. + secretsARN: "arn:aws:secretsmanager:us-east-1:000000000000:secret:nonexistent-XXX", + }, + } + + for _, tc := range cases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + h := &Handler{secretsARN: tc.secretsARN} + + // Bound the call so a hung credential-resolution attempt + // can't stall the test. We don't care about the outcome. + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) + defer cancel() + _, _ = h.loadAPIKey(ctx) + + // Half 1: the shared error field must be untouched. + assert.NoError(t, h.awsCfgErr, + "loadAPIKey must not write to h.awsCfgErr; doing so poisons the request-path identity resolver for the rest of the handler's lifetime") + + // Half 2: the shared sync.Once must still be virgin. If + // loadAPIKey had called h.awsCfgOnce.Do, our sentinel.Do + // below would no-op and `fired` would stay false. + fired := false + h.awsCfgOnce.Do(func() { fired = true }) + assert.True(t, fired, + "loadAPIKey must not call h.awsCfgOnce.Do; the request-path resolver owns that Once and a transient loadAPIKey failure must not seal it") + + // And nothing we did inside loadAPIKey should have populated + // the cached config — only the request-path resolver does. + // The Region we just set via our own Do above is the test's + // sentinel write, not loadAPIKey's. Compare structurally: + // loadAPIKey would have called LoadDefaultConfig and written + // a fully-populated config; the sentinel branch wrote zero. + assert.Empty(t, h.awsCfg.Region, + "loadAPIKey must not populate h.awsCfg; that field is owned by the request-path resolver") + }) + } + + // Sanity: an independent Once on a fresh handler still works + // normally — i.e. the test's sentinel trick isn't mutating package + // state in a way that breaks the production call site. This guards + // against a future refactor that accidentally shares the Once at + // package scope. + var probe sync.Once + probeFired := false + probe.Do(func() { probeFired = true }) + require.True(t, probeFired, "sanity: a fresh sync.Once must fire on first Do") +} + // Tests moved from handler_router_test.go - these test HandleRequest routing logic func TestHandler_HandleRequest_CORS_Preflight(t *testing.T) { diff --git a/pkg/exchange/reshape.go b/pkg/exchange/reshape.go index d945b2c5..211e2656 100644 --- a/pkg/exchange/reshape.go +++ b/pkg/exchange/reshape.go @@ -29,6 +29,16 @@ type RIInfo struct { // comparisons across currencies. Empty matches the same "skip" // semantics as MonthlyCost == 0. CurrencyCode string + // TermSeconds is the RI commitment term in seconds (31_536_000 for + // 1y, 94_608_000 for 3y) — the same unit the AWS SDK uses on the + // ReservedInstance.Duration field and the unit ec2.ConvertibleRI.Duration + // already carries. Used by the cross-family alternatives pass to + // reject term-mismatched offerings (e.g. surfacing a 3y RI as an + // alternative to a 1y commitment is wrong because the customer's + // existing exchange anchor is the 1y term). Zero means "unknown" — + // the term filter is skipped for that rec to preserve today's + // behaviour for older callers that don't populate it. + TermSeconds int64 } // UtilizationInfo provides utilization data for a specific RI. @@ -41,8 +51,8 @@ type UtilizationInfo struct { // reshape layer: the AWS offering ID, the instance type it provisions, // and the effective monthly cost (amortised fixed price + recurring // hourly charges × 730 hours). Used both as the return shape of -// OfferingLookup (what an AWS-provider closure returns) and as the -// element type of ReshapeRecommendation.AlternativeTargets (what +// PurchaseRecLookup (what the recommendations-store closure returns) and +// as the element type of ReshapeRecommendation.AlternativeTargets (what // reshape emits to the HTTP handler). Kept in pkg/exchange so // pkg/exchange stays AWS-free and providers/aws/services/ec2 imports // the type rather than defining its own. @@ -53,34 +63,51 @@ type OfferingOption struct { // NormalizationFactor is the AWS normalization factor for the // offering's instance size — needed by the local // passesDollarUnitsCheck pre-filter that gates which cross-family - // alternatives surface in the UI. Populated by FindConvertibleOfferings; - // zero on missing means the alternative cannot be safely compared - // and the pre-filter excludes it. + // alternatives surface in the UI. Zero on missing means the + // alternative cannot be safely compared and the pre-filter + // excludes it. NormalizationFactor float64 `json:"normalization_factor,omitempty"` // CurrencyCode is the ISO-4217 currency the EffectiveMonthlyCost is // denominated in (typically "USD"). The pre-filter requires source // and target currencies to match; empty on either side falls back to // "skip the currency guard" so today's USD-only fixtures stay green. CurrencyCode string `json:"currency_code,omitempty"` + // TermSeconds is the offering's commitment term in seconds + // (31_536_000 for 1y, 94_608_000 for 3y) — same unit as + // RIInfo.TermSeconds and the AWS SDK's ReservedInstance.Duration. + // AnalyzeReshapingWithRecs rejects term-mismatched alternatives + // before building AlternativeTargets so a 3y RI never surfaces as + // an alternative to a 1y source commitment (and vice versa). Zero + // on either side falls back to "skip the term guard" for callers + // or fixtures that don't populate it. + TermSeconds int64 `json:"term_seconds,omitempty"` } -// OfferingLookup is the signature of the closure that resolves -// candidate instance types into concrete offerings with pricing. Used -// by AnalyzeReshapingWithOfferings. Implementations batch the request -// into a single DescribeReservedInstancesOfferings call per peer-family -// group so the N instance-types → N API calls fan-out is avoided. -type OfferingLookup func(ctx context.Context, instanceTypes []string) ([]OfferingOption, error) +// PurchaseRecLookup is the signature of the closure that resolves a +// region + currency to the cached AWS Cost Explorer purchase +// recommendations available there. Used by AnalyzeReshapingWithRecs to +// generate cross-family alternatives without per-recommendation AWS API +// calls — the recommendations table is already populated by the +// scheduler tick. Implementations read from store.ListStoredRecommendations +// and map each RecommendationRecord to an OfferingOption (effective +// monthly cost amortised from the upfront and monthly fields). +// +// region is the AWS region the reshape page is viewing; currencyCode is +// the source RIs' currency, propagated onto each returned +// OfferingOption.CurrencyCode so the dollar-units pre-filter can refuse +// cross-currency comparisons cleanly. +type PurchaseRecLookup func(ctx context.Context, region, currencyCode string) ([]OfferingOption, error) // ReshapeRecommendation describes a suggested exchange for an underutilized RI. // -// AlternativeTargets lists cross-family options within the same -// use-case group (general-purpose / compute / memory / burstable) at -// the same target size, enriched with real offering IDs and monthly -// cost when a pricing lookup is available. This is advisory data for -// the UI to surface alongside the primary target; the auto-exchange +// AlternativeTargets lists cross-family options enriched with real +// offering IDs and monthly cost from cached AWS Cost Explorer purchase +// recommendations (see AnalyzeReshapingWithRecs). This is advisory data +// for the UI to surface alongside the primary target; the auto-exchange // pipeline still acts on TargetInstanceType only so existing automated -// behaviour is unchanged. Emitted when the primary target's family -// belongs to a known peer group; empty otherwise. +// behaviour is unchanged. Empty when the base AnalyzeReshaping is used +// directly (auto.go) or when no cached recommendations exist for the +// region. type ReshapeRecommendation struct { SourceRIID string `json:"source_ri_id"` SourceInstanceType string `json:"source_instance_type"` @@ -94,63 +121,6 @@ type ReshapeRecommendation struct { Reason string `json:"reason"` } -// peerFamilyGroups maps each family in the allowlist to the full set -// of peer families within its use-case group. AWS Convertible RIs can -// cross families when the target's $-value units (from -// DescribeReservedInstancesOfferings) match — and the most common -// viable cross-family moves are between same-generation same-use-case -// siblings (e.g. m5 ↔ m6i, c5 ↔ c7g). Suggesting these to the user -// broadens their options without pushing them into shapes that will -// fail the AWS exchange-units check at quote time. -// -// Specialty (GPU / HPC) and legacy-generation families are now also -// included; the new local passesDollarUnitsCheck pre-filter (applied -// in fillAlternativesFromOfferings) drops any alternative whose -// (NormalizationFactor × EffectiveMonthlyCost) wouldn't survive AWS's -// runtime exchange-validity rule, so suggestions stay actionable -// without a per-pair GetReservedInstancesExchangeQuote API call. -// False positives still go through the existing auto.go IsValidExchange -// skip path at execution time. -var peerFamilyGroups = map[string][]string{ - // general-purpose - "m5": {"m5", "m6i", "m7g"}, - "m6i": {"m5", "m6i", "m7g"}, - "m7g": {"m5", "m6i", "m7g"}, - // compute-optimised - "c5": {"c5", "c6i", "c7g"}, - "c6i": {"c5", "c6i", "c7g"}, - "c7g": {"c5", "c6i", "c7g"}, - // memory-optimised - "r5": {"r5", "r6i", "r7g"}, - "r6i": {"r5", "r6i", "r7g"}, - "r7g": {"r5", "r6i", "r7g"}, - // burstable (maps to itself — generation variants are - // typically distinct enough that AWS won't let you exchange - // across them; listed to keep the helper returning a sensible - // result rather than nil for t-family RIs). - "t3": {"t3", "t3a", "t4g"}, - "t3a": {"t3", "t3a", "t4g"}, - "t4g": {"t3", "t3a", "t4g"}, - // Specialty: GPU / inference / HPC. Cross-family within each - // group; the $-units check below filters unviable pairs. - "p3": {"p3", "p4d", "p5"}, - "p4d": {"p3", "p4d", "p5"}, - "p5": {"p3", "p4d", "p5"}, - "g4dn": {"g4dn", "g5"}, - "g5": {"g4dn", "g5"}, - "hpc6a": {"hpc6a", "hpc6id", "hpc7g"}, - "hpc6id": {"hpc6a", "hpc6id", "hpc7g"}, - "hpc7g": {"hpc6a", "hpc6id", "hpc7g"}, - // Legacy generations — useful when operators want to migrate - // off a legacy family to the current generation without - // changing shape entirely. The $-units check makes most legacy - // → current moves viable for the same size. - "m4": {"m4", "m5"}, - "c4": {"c4", "c5"}, - "r3": {"r3", "r4", "r5"}, - "r4": {"r3", "r4", "r5"}, -} - // passesDollarUnitsCheck approximates AWS's exchange-validity rule // locally: a target offering passes if (target.NF × target.EMC) >= // (srcNF × srcMonthlyCost). AWS's actual rule is two parallel @@ -179,14 +149,6 @@ func passesDollarUnitsCheck(srcNF, srcMonthlyCost float64, srcCurrency string, t return target.NormalizationFactor*target.EffectiveMonthlyCost >= srcNF*srcMonthlyCost } -// candidateFamilies returns the peer families (including sourceFamily -// itself) within the same use-case group, or nil when the family is -// not in the allowlist. Callers surface the returned families to users -// as cross-family alternatives. -func candidateFamilies(sourceFamily string) []string { - return peerFamilyGroups[strings.ToLower(sourceFamily)] -} - // normalizationFactors maps EC2 instance sizes to their AWS normalization factors. // See: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ri-modification-instancemove.html var normalizationFactors = map[string]float64{ @@ -239,6 +201,10 @@ func NormalizationFactorForSize(size string) float64 { // // threshold is a percentage (0–100) below which an RI is considered underutilized. // For example, threshold=95 means RIs with <95% utilization get recommendations. +// +// Recommendations are emitted with empty AlternativeTargets — alternatives +// are populated only by AnalyzeReshapingWithRecs, which pairs each rec +// against the cached AWS Cost Explorer recommendations. func AnalyzeReshaping(ris []RIInfo, utilization []UtilizationInfo, threshold float64) []ReshapeRecommendation { utilMap := make(map[string]float64, len(utilization)) for _, u := range utilization { @@ -316,12 +282,14 @@ func analyzeRI(ri RIInfo, utilMap map[string]float64, threshold float64) *Reshap } return &ReshapeRecommendation{ - SourceRIID: ri.ID, - SourceInstanceType: ri.InstanceType, - SourceCount: ri.InstanceCount, - TargetInstanceType: targetInstanceType, - TargetCount: targetCount, - AlternativeTargets: alternativesForTarget(family, targetSize), + SourceRIID: ri.ID, + SourceInstanceType: ri.InstanceType, + SourceCount: ri.InstanceCount, + TargetInstanceType: targetInstanceType, + TargetCount: targetCount, + // AlternativeTargets stays nil here; AnalyzeReshapingWithRecs + // fills it from the cached AWS Cost Explorer recommendations. + AlternativeTargets: nil, UtilizationPercent: util, NormalizedUsed: normalizedUsed, NormalizedPurchased: normalizedPurchased, @@ -334,68 +302,48 @@ func analyzeRI(ri RIInfo, utilMap map[string]float64, threshold float64) *Reshap } } -// alternativesForTarget returns cross-family equivalents at the same -// target size for the families in sourceFamily's peer group. The -// source family itself is excluded (that's the primary -// TargetInstanceType already). Returns nil when sourceFamily isn't in -// the allowlist or has no peers. -// -// Pricing fields (OfferingID, EffectiveMonthlyCost) are left empty -// here. AnalyzeReshapingWithOfferings backfills them via an injected -// lookup; callers without a lookup (auto.go, no-pricing tests) see -// name-only entries. -func alternativesForTarget(sourceFamily, targetSize string) []OfferingOption { - peers := candidateFamilies(sourceFamily) - if len(peers) <= 1 { - return nil - } - out := make([]OfferingOption, 0, len(peers)-1) - for _, p := range peers { - if strings.EqualFold(p, sourceFamily) { - continue - } - out = append(out, OfferingOption{InstanceType: p + "." + targetSize}) - } - return out -} - -// AnalyzeReshapingWithOfferings is AnalyzeReshaping + offering -// enrichment: it calls the base analyzer, batches the distinct target -// instance types (primary + alternatives) into ONE lookup call, and -// fills each rec's AlternativeTargets with real OfferingID + -// EffectiveMonthlyCost. +// AnalyzeReshapingWithRecs is AnalyzeReshaping plus cross-family +// alternatives sourced from the cached AWS Cost Explorer purchase +// recommendations table. It calls the base analyzer, then makes a +// SINGLE region-scoped lookup and pairs each rec against the returned +// offerings — no per-recommendation AWS API call, no hand-curated +// peer-family allowlist. // -// Missing-offering behaviour: if the lookup returns no entry for a -// candidate instance type, that alternative is silently dropped from -// the rec's slice — the rec still ships with its primary target and -// the alternatives that DID resolve. If the lookup itself errors, the -// base recs are returned with empty AlternativeTargets across the -// board; the dashboard's primary reshape suggestions stay intact. +// Filtering rules per rec: +// - The source family (parsed from rec.SourceInstanceType) is excluded +// so the alternatives slice carries only cross-family options. +// - The primary target (rec.TargetInstanceType) is also excluded — it +// is already surfaced as the primary suggestion. +// - passesDollarUnitsCheck gates each surviving offering against the +// source RI's NF / MonthlyCost / CurrencyCode so the UI doesn't +// show options that would be rejected at AWS exchange time. When +// the source RI lacks pricing (MonthlyCost == 0) the gate is +// skipped for that rec to preserve today's behaviour for callers +// that don't supply pricing. // -// auto.go keeps calling the existing AnalyzeReshaping (no pricing -// needed); only the HTTP handlers that surface recommendations to -// users call the enriched version. -func AnalyzeReshapingWithOfferings( +// Missing cache, lookup error, or empty-region response: rec ships with +// empty AlternativeTargets — the dashboard's primary reshape suggestion +// stays intact and the UX matches "AWS hasn't recommended anything for +// this region yet". auto.go keeps calling AnalyzeReshaping (no +// alternatives needed); only the HTTP reshape handler calls this enriched +// version. +func AnalyzeReshapingWithRecs( ctx context.Context, ris []RIInfo, utilization []UtilizationInfo, threshold float64, - lookup OfferingLookup, + region, currencyCode string, + lookup PurchaseRecLookup, ) []ReshapeRecommendation { recs := AnalyzeReshaping(ris, utilization, threshold) if lookup == nil || len(recs) == 0 { return recs } - types := distinctCandidateTypes(recs) - if len(types) == 0 { - return recs - } - - offerings, err := lookup(ctx, types) - if err != nil { - // Fall back to name-only alternatives — losing pricing is - // strictly less bad than losing the whole reshape page. + offerings, err := lookup(ctx, region, currencyCode) + if err != nil || len(offerings) == 0 { + // Fall through to base recs — losing alternatives is strictly + // less bad than losing the whole reshape page. return recs } @@ -406,74 +354,29 @@ func AnalyzeReshapingWithOfferings( for _, r := range ris { risByID[r.ID] = r } - fillAlternativesFromOfferings(recs, offerings, risByID) + fillAlternativesFromRecs(recs, offerings, risByID) return recs } -// distinctCandidateTypes collects de-duplicated instance types from all -// recs' primary + alternative targets, sorted for deterministic -// lookups in tests. -func distinctCandidateTypes(recs []ReshapeRecommendation) []string { - want := make(map[string]struct{}) - for _, r := range recs { - if r.TargetInstanceType != "" { - want[r.TargetInstanceType] = struct{}{} - } - for _, alt := range r.AlternativeTargets { - if alt.InstanceType != "" { - want[alt.InstanceType] = struct{}{} - } - } - } - types := make([]string, 0, len(want)) - for t := range want { - types = append(types, t) - } - sort.Strings(types) - return types -} - -// fillAlternativesFromOfferings replaces each rec's AlternativeTargets -// with the matching OfferingOption from the lookup result. Missing -// instance types are silently dropped (per the doc on -// AnalyzeReshapingWithOfferings). -// -// risByID lets the helper apply the local passesDollarUnitsCheck per -// rec — alternatives that wouldn't survive AWS's runtime exchange- -// validity rule are dropped here so the UI doesn't show options that -// would fail at quote time. When the source RI isn't in the map (or -// has zero NF / MonthlyCost), the check is skipped for that rec to -// preserve today's behaviour for callers that don't supply pricing. -// -// The output is sorted ascending by EffectiveMonthlyCost so the UI -// shows cheapest alternatives first — this matches user intent (the -// primary advisory signal of this list is "is there a cheaper option -// than the primary target?") even though it differs from the peer- -// family allowlist order that the base AnalyzeReshaping emits. -func fillAlternativesFromOfferings(recs []ReshapeRecommendation, offerings []OfferingOption, risByID map[string]RIInfo) { - offByType := make(map[string]OfferingOption, len(offerings)) - for _, o := range offerings { - if _, exists := offByType[o.InstanceType]; !exists { - offByType[o.InstanceType] = o - } - } +// fillAlternativesFromRecs is the per-rec body of +// AnalyzeReshapingWithRecs: for each rec it filters the offerings list +// to cross-family options that match the source RI's term and pass the +// dollar-units gate, then sorts them ascending by EffectiveMonthlyCost +// so the cheapest lands first in the UI list. The source family and +// the primary target are excluded so the alternatives slice is +// meaningfully different from the primary suggestion. When source +// pricing or the source/offering term is missing the relevant gate is +// skipped for that rec (NF/MonthlyCost==0 / TermSeconds==0 cases). +func fillAlternativesFromRecs(recs []ReshapeRecommendation, offerings []OfferingOption, risByID map[string]RIInfo) { for i := range recs { src, hasSrc := risByID[recs[i].SourceRIID] - filled := make([]OfferingOption, 0, len(recs[i].AlternativeTargets)) - for _, alt := range recs[i].AlternativeTargets { - found, ok := offByType[alt.InstanceType] - if !ok { + srcFamily, _ := parseInstanceType(recs[i].SourceInstanceType) + filled := make([]OfferingOption, 0, len(offerings)) + for _, off := range offerings { + if !alternativeIsEligible(recs[i], off, src, hasSrc, srcFamily) { continue } - // Apply the pre-filter only when source pricing info is - // available; otherwise keep today's behaviour and pass - // the alternative through untouched. - if hasSrc && src.MonthlyCost > 0 { - if !passesDollarUnitsCheck(src.NormalizationFactor, src.MonthlyCost, src.CurrencyCode, found) { - continue - } - } - filled = append(filled, found) + filled = append(filled, off) } sort.Slice(filled, func(a, b int) bool { return filled[a].EffectiveMonthlyCost < filled[b].EffectiveMonthlyCost @@ -482,6 +385,87 @@ func fillAlternativesFromOfferings(recs []ReshapeRecommendation, offerings []Off } } +// alternativeIsEligible decides whether a single offering should appear +// in the AlternativeTargets slice for the given rec. Pulled out of +// fillAlternativesFromRecs so the per-offering guards stay readable +// (and below the 10-cyclomatic-complexity gate). Returns true only when +// every gate passes: +// +// - Cross-family: the offering must be a different family from the +// source RI; same-family offerings collapse onto the primary +// TargetInstanceType already surfaced. +// - Not the primary target: an "alternative to itself" is no +// alternative. +// - Term match: when both sides report TermSeconds, they must match +// because AWS only allows exchanges within the same term. Either +// side at zero falls back to "skip the gate" so legacy callers +// stay green. +// - $-units: when source pricing is available, the local +// passesDollarUnitsCheck approximation must hold; otherwise the +// gate is skipped. +func alternativeIsEligible(rec ReshapeRecommendation, off OfferingOption, src RIInfo, hasSrc bool, srcFamily string) bool { + if !isCrossFamilyAlternative(off, srcFamily, rec.TargetInstanceType) { + return false + } + if !termMatchesIfKnown(src, off, hasSrc) { + return false + } + if !pricingGatePasses(src, off, hasSrc) { + return false + } + return true +} + +// isCrossFamilyAlternative returns true when the offering is a valid +// cross-family alternative slot — i.e. its family parses, differs from +// the source family, and the offering isn't the same as the primary +// target the rec already surfaces. +func isCrossFamilyAlternative(off OfferingOption, srcFamily, primaryTarget string) bool { + offFamily, _ := parseInstanceType(off.InstanceType) + if offFamily == "" || strings.EqualFold(offFamily, srcFamily) { + return false + } + if off.InstanceType == primaryTarget { + return false + } + return true +} + +// termMatchesIfKnown enforces the term-match guard when both sides +// report TermSeconds. Either side at zero (or no source RI at all) +// returns true so legacy fixtures and older callers keep today's +// behaviour. +func termMatchesIfKnown(src RIInfo, off OfferingOption, hasSrc bool) bool { + if !hasSrc || src.TermSeconds <= 0 || off.TermSeconds <= 0 { + return true + } + return src.TermSeconds == off.TermSeconds +} + +// pricingGatePasses runs the $-units pre-filter when source pricing +// is available. Without source pricing the gate is skipped to preserve +// backwards compatibility for callers that only need the primary-target +// analysis. +// +// Defensive NF fallback: callers occasionally populate MonthlyCost but +// leave NormalizationFactor at zero (e.g. tests, partial RIInfo +// constructions). A zero NF here would reject every alternative even +// when InstanceType is parseable. Derive NF from the instance size when +// it's missing; if the size doesn't match the AWS canonical table, +// NormalizationFactorForSize returns 1.0 — degrades to "no NF +// adjustment" rather than a hard reject. +func pricingGatePasses(src RIInfo, off OfferingOption, hasSrc bool) bool { + if !hasSrc || src.MonthlyCost <= 0 { + return true + } + srcNF := src.NormalizationFactor + if srcNF <= 0 { + _, size := parseInstanceType(src.InstanceType) + srcNF = NormalizationFactorForSize(size) + } + return passesDollarUnitsCheck(srcNF, src.MonthlyCost, src.CurrencyCode, off) +} + // findBestFit finds the instance size and count that best fits normalizedUsed units. // Strategy: find the largest single-instance size that doesn't exceed normalizedUsed, // then round up to that size. This gives a practical 1-instance recommendation. diff --git a/pkg/exchange/reshape_crossfamily_test.go b/pkg/exchange/reshape_crossfamily_test.go index 2365ea51..4b543949 100644 --- a/pkg/exchange/reshape_crossfamily_test.go +++ b/pkg/exchange/reshape_crossfamily_test.go @@ -10,257 +10,355 @@ import ( "github.com/stretchr/testify/require" ) -func TestCandidateFamilies_AllowlistedFamilies(t *testing.T) { +func TestAnalyzeReshaping_BaseEmitsNoAlternatives(t *testing.T) { t.Parallel() - - cases := []struct { - source string - want []string - }{ - // General-purpose peers. - {"m5", []string{"m5", "m6i", "m7g"}}, - {"m6i", []string{"m5", "m6i", "m7g"}}, - {"m7g", []string{"m5", "m6i", "m7g"}}, - // Compute-optimised peers. - {"c5", []string{"c5", "c6i", "c7g"}}, - {"c7g", []string{"c5", "c6i", "c7g"}}, - // Memory-optimised peers. - {"r5", []string{"r5", "r6i", "r7g"}}, - {"r6i", []string{"r5", "r6i", "r7g"}}, - // Burstable peers. - {"t3", []string{"t3", "t3a", "t4g"}}, - // Case insensitivity — operators may supply uppercase. - {"M5", []string{"m5", "m6i", "m7g"}}, - } - for _, c := range cases { - got := candidateFamilies(c.source) - sort.Strings(got) - sort.Strings(c.want) - assert.Equal(t, c.want, got, "unexpected peers for %q", c.source) - } + // The base AnalyzeReshaping no longer emits cross-family + // alternatives — that responsibility now lives in + // AnalyzeReshapingWithRecs, which sources options from the cached + // AWS Cost Explorer recommendations table. The base path's + // AlternativeTargets must always be nil regardless of family so the + // auto.go pipeline (which uses the base path) keeps its current + // "primary target only" contract. + recs := AnalyzeReshaping( + []RIInfo{{ID: "ri-1", InstanceType: "m5.xlarge", InstanceCount: 1, OfferingClass: "convertible"}}, + []UtilizationInfo{{RIID: "ri-1", UtilizationPercent: 50}}, + 95, + ) + require.Len(t, recs, 1) + assert.Equal(t, "m5.large", recs[0].TargetInstanceType, "primary same-family downsize stays") + assert.Nil(t, recs[0].AlternativeTargets, + "base path emits no alternatives — recs-driven path owns that surface") } -func TestCandidateFamilies_SpecialtyAndLegacyFamilies(t *testing.T) { +func TestAnalyzeReshaping_StandardRIStillSkipped(t *testing.T) { t.Parallel() - // Specialty (GPU/HPC) and legacy-generation families are now in - // the allowlist; the local passesDollarUnitsCheck pre-filter (in - // fillAlternativesFromOfferings) drops unviable cross-family - // pairs so suggestions stay actionable without per-pair AWS quote - // API calls. These groups are fixed in code; the test pins the - // exact membership. - cases := []struct { - source string - want []string - }{ - // GPU - {"p3", []string{"p3", "p4d", "p5"}}, - {"p4d", []string{"p3", "p4d", "p5"}}, - {"g4dn", []string{"g4dn", "g5"}}, - // HPC - {"hpc6a", []string{"hpc6a", "hpc6id", "hpc7g"}}, - // Legacy generations. - {"m4", []string{"m4", "m5"}}, - {"c4", []string{"c4", "c5"}}, - {"r3", []string{"r3", "r4", "r5"}}, - {"r4", []string{"r3", "r4", "r5"}}, - } - for _, c := range cases { - got := candidateFamilies(c.source) - sort.Strings(got) - sort.Strings(c.want) - assert.Equal(t, c.want, got, "unexpected peers for %q", c.source) - } + // Regression guard: the refactor must not start emitting + // recommendations for Standard RIs, which AWS forbids from + // exchanging entirely. + recs := AnalyzeReshaping( + []RIInfo{{ID: "ri-std", InstanceType: "m5.xlarge", InstanceCount: 1, OfferingClass: "standard"}}, + []UtilizationInfo{{RIID: "ri-std", UtilizationPercent: 30}}, + 95, + ) + assert.Empty(t, recs, "standard RI must never produce a recommendation") } -func TestCandidateFamilies_UnlistedFamiliesReturnNil(t *testing.T) { +// TestAnalyzeReshapingWithRecs_RecommendationDrivenAlternatives — the +// fake lookup returns offerings spanning m5/c5/r5; an underutilised m5 +// RI should surface c5 + r5 alternatives (cross-family), with the same +// family (m5) excluded so the alternatives slice carries only options +// that differ meaningfully from the primary target. Sort order is +// ascending by EffectiveMonthlyCost — cheapest first. +func TestAnalyzeReshapingWithRecs_RecommendationDrivenAlternatives(t *testing.T) { t.Parallel() - // Truly off-allowlist families: x1 / g4 (note: g4dn IS in the - // allowlist, but plain "g4" without the dn suffix isn't), older - // HPC variants, and the empty string. These return nil — no - // cross-family suggestions surface for the underlying RI. - for _, fam := range []string{"g4", "x1", "hpc7a", ""} { - assert.Nil(t, candidateFamilies(fam), "expected nil for unlisted family %q", fam) + + var callCount int + var gotRegion, gotCurrency string + lookup := func(_ context.Context, region, currency string) ([]OfferingOption, error) { + callCount++ + gotRegion, gotCurrency = region, currency + return []OfferingOption{ + {InstanceType: "m5.large", OfferingID: "off-m5", EffectiveMonthlyCost: 40.0, NormalizationFactor: 4, CurrencyCode: "USD"}, + {InstanceType: "c5.large", OfferingID: "off-c5", EffectiveMonthlyCost: 50.0, NormalizationFactor: 4, CurrencyCode: "USD"}, + {InstanceType: "r5.large", OfferingID: "off-r5", EffectiveMonthlyCost: 60.0, NormalizationFactor: 4, CurrencyCode: "USD"}, + }, nil } -} -func TestAnalyzeReshaping_EmitsCrossFamilyAlternatives(t *testing.T) { - t.Parallel() - // m5.xlarge at 50% → primary target m5.large; alternatives - // should list m6i.large and m7g.large (same peer group, same - // target size, source family excluded). - recs := AnalyzeReshaping( - []RIInfo{{ID: "ri-1", InstanceType: "m5.xlarge", InstanceCount: 1, OfferingClass: "convertible"}}, + recs := AnalyzeReshapingWithRecs( + context.Background(), + []RIInfo{{ + ID: "ri-1", InstanceType: "m5.xlarge", InstanceCount: 1, OfferingClass: "convertible", + NormalizationFactor: 8, MonthlyCost: 25, CurrencyCode: "USD", // src units = 200 + }}, []UtilizationInfo{{RIID: "ri-1", UtilizationPercent: 50}}, 95, + "us-east-1", "USD", + lookup, ) - if len(recs) != 1 { - t.Fatalf("expected 1 recommendation; got %d", len(recs)) - } + require.Len(t, recs, 1) + assert.Equal(t, 1, callCount, "lookup must be called exactly once per reshape pass") + assert.Equal(t, "us-east-1", gotRegion, "region must be threaded through") + assert.Equal(t, "USD", gotCurrency, "currency must be threaded through") + got := recs[0] - assert.Equal(t, "m5.large", got.TargetInstanceType) + assert.Equal(t, "m5.large", got.TargetInstanceType, "primary downsize unchanged") gotAlts := make([]string, 0, len(got.AlternativeTargets)) for _, alt := range got.AlternativeTargets { gotAlts = append(gotAlts, alt.InstanceType) } sort.Strings(gotAlts) - assert.Equal(t, []string{"m6i.large", "m7g.large"}, gotAlts, - "peer families should be suggested at the same target size, source family excluded") - // No lookup injected at this call site → pricing fields stay zero. - for _, alt := range got.AlternativeTargets { - assert.Empty(t, alt.OfferingID, "base AnalyzeReshaping does not resolve offering IDs") - assert.Zero(t, alt.EffectiveMonthlyCost, "base AnalyzeReshaping does not resolve prices") - } + assert.Equal(t, []string{"c5.large", "r5.large"}, gotAlts, + "cross-family alternatives must surface; same-family m5 must be excluded") + // Ascending by EffectiveMonthlyCost: c5 ($50) before r5 ($60). + require.Len(t, got.AlternativeTargets, 2) + assert.Equal(t, "c5.large", got.AlternativeTargets[0].InstanceType) + assert.Equal(t, "off-c5", got.AlternativeTargets[0].OfferingID) + assert.InDelta(t, 50.0, got.AlternativeTargets[0].EffectiveMonthlyCost, 0.001) + assert.Equal(t, "r5.large", got.AlternativeTargets[1].InstanceType) + assert.InDelta(t, 60.0, got.AlternativeTargets[1].EffectiveMonthlyCost, 0.001) } -func TestAnalyzeReshaping_NoAlternativesForUnlistedFamily(t *testing.T) { +// TestAnalyzeReshapingWithRecs_EmptyLookupReturnsNoAlternatives — cold +// cache / no recs in the region: the rec ships with its primary target +// and an empty AlternativeTargets slice. UI matches "AWS hasn't +// recommended anything for this region yet". +func TestAnalyzeReshapingWithRecs_EmptyLookupReturnsNoAlternatives(t *testing.T) { t.Parallel() - // "x1" is not in the allowlist, so no cross-family alternatives - // are emitted at the base layer. We still emit the same-family - // primary reshape — name-only alternatives only get filled in - // when the family has peers in the group. - recs := AnalyzeReshaping( - []RIInfo{{ID: "ri-x", InstanceType: "x1.16xlarge", InstanceCount: 1, OfferingClass: "convertible"}}, - []UtilizationInfo{{RIID: "ri-x", UtilizationPercent: 40}}, + lookup := func(_ context.Context, _, _ string) ([]OfferingOption, error) { + return nil, nil + } + recs := AnalyzeReshapingWithRecs( + context.Background(), + []RIInfo{{ID: "ri-1", InstanceType: "m5.xlarge", InstanceCount: 1, OfferingClass: "convertible"}}, + []UtilizationInfo{{RIID: "ri-1", UtilizationPercent: 50}}, 95, + "us-east-1", "USD", + lookup, ) - if len(recs) != 1 { - t.Fatalf("expected 1 recommendation; got %d", len(recs)) - } - assert.Nil(t, recs[0].AlternativeTargets, - "unlisted families must not get cross-family alternatives") + require.Len(t, recs, 1) + assert.Equal(t, "m5.large", recs[0].TargetInstanceType) + assert.Empty(t, recs[0].AlternativeTargets, + "empty cache → no alternatives but primary target still surfaced") } -func TestAnalyzeReshaping_StandardRIStillSkipped(t *testing.T) { +// TestAnalyzeReshapingWithRecs_AppliesDollarUnitsFilter — alternatives +// that would fail AWS's $-units exchange check are dropped before +// reaching the UI, matching the existing behaviour of the local +// pre-filter. Source pricing must be supplied (NF + MonthlyCost) for +// the gate to engage. +func TestAnalyzeReshapingWithRecs_AppliesDollarUnitsFilter(t *testing.T) { t.Parallel() - // Regression guard: adding cross-family suggestions must not - // start emitting recommendations for Standard RIs, which AWS - // forbids from exchanging entirely. - recs := AnalyzeReshaping( - []RIInfo{{ID: "ri-std", InstanceType: "m5.xlarge", InstanceCount: 1, OfferingClass: "standard"}}, - []UtilizationInfo{{RIID: "ri-std", UtilizationPercent: 30}}, + + lookup := func(_ context.Context, _, _ string) ([]OfferingOption, error) { + return []OfferingOption{ + // c5.large priced too cheap to satisfy the units check + // (4 × 5 = 20 < src 200) — should be filtered out. + {InstanceType: "c5.large", OfferingID: "off-c5", EffectiveMonthlyCost: 5.0, NormalizationFactor: 4, CurrencyCode: "USD"}, + // r5.large priced enough to pass (4 × 50 = 200 ≥ 200). + {InstanceType: "r5.large", OfferingID: "off-r5", EffectiveMonthlyCost: 50.0, NormalizationFactor: 4, CurrencyCode: "USD"}, + }, nil + } + + recs := AnalyzeReshapingWithRecs( + context.Background(), + []RIInfo{{ + ID: "ri-1", InstanceType: "m5.xlarge", InstanceCount: 1, OfferingClass: "convertible", + NormalizationFactor: 8, MonthlyCost: 25, CurrencyCode: "USD", // src units = 200 + }}, + []UtilizationInfo{{RIID: "ri-1", UtilizationPercent: 50}}, 95, + "us-east-1", "USD", + lookup, ) - assert.Empty(t, recs, "standard RI must never produce a recommendation") + require.Len(t, recs, 1) + alts := recs[0].AlternativeTargets + require.Len(t, alts, 1, "only r5.large should pass; c5.large gets filtered for failing the $-units check") + assert.Equal(t, "r5.large", alts[0].InstanceType) } -func TestAnalyzeReshapingWithOfferings_EnrichesAlternatives(t *testing.T) { +// TestAnalyzeReshapingWithRecs_NoSourcePricingSkipsFilter — when the +// caller doesn't populate RIInfo.MonthlyCost (zero), the filter is +// skipped entirely and every cross-family offering passes through. +// Pins backwards compatibility for older callers that don't compute +// per-RI monthly cost. +func TestAnalyzeReshapingWithRecs_NoSourcePricingSkipsFilter(t *testing.T) { t.Parallel() - // m5.xlarge at 50% → primary m5.large + alternatives m6i.large, - // m7g.large. Lookup returns pricing for all three plus an extra - // unrelated offering that should be ignored. Lookup is called - // exactly once with the de-duplicated instance-type set. - var gotTypes []string - var callCount int - lookup := func(ctx context.Context, instanceTypes []string) ([]OfferingOption, error) { - callCount++ - gotTypes = append([]string{}, instanceTypes...) + lookup := func(_ context.Context, _, _ string) ([]OfferingOption, error) { return []OfferingOption{ - {InstanceType: "m5.large", OfferingID: "off-m5", EffectiveMonthlyCost: 40.0}, - {InstanceType: "m6i.large", OfferingID: "off-m6i", EffectiveMonthlyCost: 35.0}, - {InstanceType: "m7g.large", OfferingID: "off-m7g", EffectiveMonthlyCost: 30.0}, - {InstanceType: "irrelevant.size", OfferingID: "off-x", EffectiveMonthlyCost: 999.0}, + // Would normally be dropped if the filter ran, but no source + // pricing means we keep today's "show every cross-family match" + // behaviour. + {InstanceType: "c5.large", OfferingID: "off-c5", EffectiveMonthlyCost: 5.0, NormalizationFactor: 4}, + {InstanceType: "r5.large", OfferingID: "off-r5", EffectiveMonthlyCost: 50.0, NormalizationFactor: 4}, }, nil } - - recs := AnalyzeReshapingWithOfferings( + recs := AnalyzeReshapingWithRecs( context.Background(), + // Older RIInfo shape: no MonthlyCost / CurrencyCode populated. []RIInfo{{ID: "ri-1", InstanceType: "m5.xlarge", InstanceCount: 1, OfferingClass: "convertible"}}, []UtilizationInfo{{RIID: "ri-1", UtilizationPercent: 50}}, 95, + "us-east-1", "", lookup, ) - if len(recs) != 1 { - t.Fatalf("expected 1 rec; got %d", len(recs)) - } - assert.Equal(t, 1, callCount, "lookup must be called exactly once (batched)") - - sort.Strings(gotTypes) - assert.Equal(t, []string{"m5.large", "m6i.large", "m7g.large"}, gotTypes, - "lookup should receive de-duplicated instance types") - - got := recs[0] - assert.Equal(t, "m5.large", got.TargetInstanceType) - if len(got.AlternativeTargets) != 2 { - t.Fatalf("expected 2 alternatives; got %+v", got.AlternativeTargets) - } - // AnalyzeReshapingWithOfferings sorts alternatives ascending by - // EffectiveMonthlyCost so the cheapest option is first. Lookup - // returned m6i.large=$35 and m7g.large=$30 → m7g first. - assert.Equal(t, "m7g.large", got.AlternativeTargets[0].InstanceType) - assert.Equal(t, "off-m7g", got.AlternativeTargets[0].OfferingID) - assert.InDelta(t, 30.0, got.AlternativeTargets[0].EffectiveMonthlyCost, 0.001) - assert.Equal(t, "m6i.large", got.AlternativeTargets[1].InstanceType) - assert.InDelta(t, 35.0, got.AlternativeTargets[1].EffectiveMonthlyCost, 0.001) + require.Len(t, recs, 1) + require.Len(t, recs[0].AlternativeTargets, 2, + "with no source pricing, both cross-family alternatives must remain visible") } -func TestAnalyzeReshapingWithOfferings_MissingOfferingDroppedNotWholeRec(t *testing.T) { +// TestAnalyzeReshapingWithRecs_ExcludesPrimaryTarget — when the lookup +// returns an offering whose InstanceType matches the rec's primary +// TargetInstanceType, that offering is excluded from AlternativeTargets +// because it isn't an alternative to itself; it's the same suggestion. +// The cross-family offerings that remain are surfaced as before. +func TestAnalyzeReshapingWithRecs_ExcludesPrimaryTarget(t *testing.T) { t.Parallel() - // AWS doesn't offer m7g.large in this region. The lookup returns - // only m5.large + m6i.large. The rec should still ship with - // m5.large as primary and m6i.large as the only alternative; - // m7g.large is silently dropped. - lookup := func(ctx context.Context, _ []string) ([]OfferingOption, error) { + lookup := func(_ context.Context, _, _ string) ([]OfferingOption, error) { return []OfferingOption{ - {InstanceType: "m5.large", OfferingID: "off-m5", EffectiveMonthlyCost: 40.0}, - {InstanceType: "m6i.large", OfferingID: "off-m6i", EffectiveMonthlyCost: 35.0}, + // Same family + same size as the primary target — must be + // excluded by the same-family guard regardless. + {InstanceType: "m5.large", OfferingID: "off-m5", EffectiveMonthlyCost: 40.0, NormalizationFactor: 4}, + {InstanceType: "c5.large", OfferingID: "off-c5", EffectiveMonthlyCost: 50.0, NormalizationFactor: 4}, }, nil } - recs := AnalyzeReshapingWithOfferings( + recs := AnalyzeReshapingWithRecs( context.Background(), []RIInfo{{ID: "ri-1", InstanceType: "m5.xlarge", InstanceCount: 1, OfferingClass: "convertible"}}, []UtilizationInfo{{RIID: "ri-1", UtilizationPercent: 50}}, 95, + "us-east-1", "", lookup, ) - if len(recs) != 1 { - t.Fatalf("expected rec to still ship; got %d", len(recs)) - } - if len(recs[0].AlternativeTargets) != 1 || recs[0].AlternativeTargets[0].InstanceType != "m6i.large" { - t.Fatalf("expected only m6i.large to survive; got %+v", recs[0].AlternativeTargets) - } + require.Len(t, recs, 1) + require.Len(t, recs[0].AlternativeTargets, 1) + assert.Equal(t, "c5.large", recs[0].AlternativeTargets[0].InstanceType) } -func TestAnalyzeReshapingWithOfferings_LookupErrorFallsBackToBaseRecs(t *testing.T) { +// TestAnalyzeReshapingWithRecs_LookupErrorFallsBackToBaseRecs — Cost +// Explorer cache read fails: return the base recs (primary target +// intact, empty alternatives). Losing alternatives is strictly less +// bad than losing the whole reshape page. +func TestAnalyzeReshapingWithRecs_LookupErrorFallsBackToBaseRecs(t *testing.T) { t.Parallel() - // Cost Explorer 5xx → return base recs with empty - // AlternativeTargets (primary target still shown). Losing pricing - // is strictly less bad than losing the whole reshape page. - lookup := func(ctx context.Context, _ []string) ([]OfferingOption, error) { - return nil, fmt.Errorf("api call failed") + lookup := func(_ context.Context, _, _ string) ([]OfferingOption, error) { + return nil, fmt.Errorf("postgres timeout") } - recs := AnalyzeReshapingWithOfferings( + recs := AnalyzeReshapingWithRecs( context.Background(), []RIInfo{{ID: "ri-1", InstanceType: "m5.xlarge", InstanceCount: 1, OfferingClass: "convertible"}}, []UtilizationInfo{{RIID: "ri-1", UtilizationPercent: 50}}, 95, + "us-east-1", "USD", lookup, ) - if len(recs) != 1 { - t.Fatalf("expected 1 rec; got %d", len(recs)) - } + require.Len(t, recs, 1) assert.Equal(t, "m5.large", recs[0].TargetInstanceType) - // Base recs had name-only alternatives; lookup failed so those - // are preserved as-is (not silently cleared). The handler can - // still render instance-type chips even without pricing. - assert.NotEmpty(t, recs[0].AlternativeTargets, - "lookup error should leave the name-only alternatives intact") + assert.Empty(t, recs[0].AlternativeTargets, + "lookup error → no alternatives, primary target still ships") } -func TestAnalyzeReshapingWithOfferings_NilLookupUsesBaseRecs(t *testing.T) { +// TestAnalyzeReshapingWithRecs_NilLookupUsesBaseRecs — defensive: nil +// lookup should not panic; the base recs flow through unchanged. +func TestAnalyzeReshapingWithRecs_NilLookupUsesBaseRecs(t *testing.T) { t.Parallel() - recs := AnalyzeReshapingWithOfferings( + recs := AnalyzeReshapingWithRecs( context.Background(), []RIInfo{{ID: "ri-1", InstanceType: "m5.xlarge", InstanceCount: 1, OfferingClass: "convertible"}}, []UtilizationInfo{{RIID: "ri-1", UtilizationPercent: 50}}, 95, + "us-east-1", "USD", nil, ) - if len(recs) != 1 { - t.Fatalf("expected 1 rec; got %d", len(recs)) + require.Len(t, recs, 1) + assert.Equal(t, "m5.large", recs[0].TargetInstanceType) + assert.Empty(t, recs[0].AlternativeTargets) +} + +// TestAnalyzeReshapingWithRecs_LegacyFamilyM4GeneratesAlternatives — +// post-refactor, "legacy family" support is no longer hand-curated: +// any cross-family offering returned by the cached recs surfaces as +// long as it passes the dollar-units check. An underutilised m4 RI +// paired against c5 / r5 / m5 recs surfaces all three (m5 because it +// is a different family from m4 and the gate accepts). +func TestAnalyzeReshapingWithRecs_LegacyFamilyM4GeneratesAlternatives(t *testing.T) { + t.Parallel() + lookup := func(_ context.Context, _, _ string) ([]OfferingOption, error) { + return []OfferingOption{ + {InstanceType: "m5.large", OfferingID: "off-m5l", EffectiveMonthlyCost: 60.0, NormalizationFactor: 4, CurrencyCode: "USD"}, + {InstanceType: "c5.large", OfferingID: "off-c5l", EffectiveMonthlyCost: 70.0, NormalizationFactor: 4, CurrencyCode: "USD"}, + }, nil } - assert.NotEmpty(t, recs[0].AlternativeTargets, - "nil lookup should still leave the name-only alternatives in place") + recs := AnalyzeReshapingWithRecs( + context.Background(), + []RIInfo{{ + ID: "ri-m4", InstanceType: "m4.xlarge", InstanceCount: 1, OfferingClass: "convertible", + NormalizationFactor: 8, MonthlyCost: 25, CurrencyCode: "USD", // src units = 200 + }}, + []UtilizationInfo{{RIID: "ri-m4", UtilizationPercent: 50}}, + 95, + "us-east-1", "USD", + lookup, + ) + require.Len(t, recs, 1) + got := recs[0] + assert.Equal(t, "m4.large", got.TargetInstanceType) + require.Len(t, got.AlternativeTargets, 2, + "both m5.large and c5.large should pass the $-units check") + // Ascending by EffectiveMonthlyCost. + assert.Equal(t, "m5.large", got.AlternativeTargets[0].InstanceType) + assert.Equal(t, "c5.large", got.AlternativeTargets[1].InstanceType) +} + +// TestAnalyzeReshapingWithRecs_TermMismatchedAlternativesFiltered pins +// the term-match guard: a 1y source RI must not see 3y alternatives in +// AlternativeTargets (and vice versa) because AWS only allows exchanges +// within the same term. Both sides populate TermSeconds — the guard +// rejects the mismatched offering before AlternativeTargets is built. +const oneYearSeconds = int64(365 * 24 * 60 * 60) +const threeYearSeconds = 3 * oneYearSeconds + +func TestAnalyzeReshapingWithRecs_TermMismatchedAlternativesFiltered(t *testing.T) { + t.Parallel() + lookup := func(_ context.Context, _, _ string) ([]OfferingOption, error) { + return []OfferingOption{ + // 1y c5.large — same term as the source, must surface. + {InstanceType: "c5.large", OfferingID: "off-c5-1y", EffectiveMonthlyCost: 50, NormalizationFactor: 4, CurrencyCode: "USD", TermSeconds: oneYearSeconds}, + // 3y r5.large — term mismatch, must be filtered out even + // though it would otherwise pass the $-units check. + {InstanceType: "r5.large", OfferingID: "off-r5-3y", EffectiveMonthlyCost: 60, NormalizationFactor: 4, CurrencyCode: "USD", TermSeconds: threeYearSeconds}, + }, nil + } + recs := AnalyzeReshapingWithRecs( + context.Background(), + []RIInfo{{ + ID: "ri-1", InstanceType: "m5.xlarge", InstanceCount: 1, OfferingClass: "convertible", + NormalizationFactor: 8, MonthlyCost: 25, CurrencyCode: "USD", + TermSeconds: oneYearSeconds, // 1y source + }}, + []UtilizationInfo{{RIID: "ri-1", UtilizationPercent: 50}}, + 95, + "us-east-1", "USD", + lookup, + ) + require.Len(t, recs, 1) + require.Len(t, recs[0].AlternativeTargets, 1, + "only the 1y alternative survives; the 3y r5.large must be filtered as term-mismatched") + assert.Equal(t, "c5.large", recs[0].AlternativeTargets[0].InstanceType) + assert.Equal(t, oneYearSeconds, recs[0].AlternativeTargets[0].TermSeconds, + "surviving alternative must carry the source's term so the UI can label it") +} + +// TestAnalyzeReshapingWithRecs_TermZeroSkipsTermGuard pins the +// backwards-compat path: when either the source or the offering omits +// TermSeconds, the guard does not fire and the alternative passes +// through (subject to the other gates). Mirrors the existing +// MonthlyCost==0 / CurrencyCode=="" skip semantics. +func TestAnalyzeReshapingWithRecs_TermZeroSkipsTermGuard(t *testing.T) { + t.Parallel() + lookup := func(_ context.Context, _, _ string) ([]OfferingOption, error) { + return []OfferingOption{ + // 3y offering, but the source has no term info — the term + // gate stays disabled so the offering surfaces. + {InstanceType: "r5.large", OfferingID: "off-r5-3y", EffectiveMonthlyCost: 60, NormalizationFactor: 4, CurrencyCode: "USD", TermSeconds: threeYearSeconds}, + }, nil + } + recs := AnalyzeReshapingWithRecs( + context.Background(), + []RIInfo{{ + ID: "ri-1", InstanceType: "m5.xlarge", InstanceCount: 1, OfferingClass: "convertible", + NormalizationFactor: 8, MonthlyCost: 25, CurrencyCode: "USD", + // TermSeconds intentionally zero — older callers / fixtures. + }}, + []UtilizationInfo{{RIID: "ri-1", UtilizationPercent: 50}}, + 95, + "us-east-1", "USD", + lookup, + ) + require.Len(t, recs, 1) + require.Len(t, recs[0].AlternativeTargets, 1, + "source TermSeconds==0 must skip the term gate so today's behaviour is preserved") + assert.Equal(t, "r5.large", recs[0].AlternativeTargets[0].InstanceType) } // TestPassesDollarUnitsCheck pins the local pre-filter rule that gates @@ -348,99 +446,3 @@ func TestPassesDollarUnitsCheck(t *testing.T) { }) } } - -// TestAnalyzeReshapingWithOfferings_LegacyFamilyM4GeneratesM5Alternative -// — given an underutilised m4 RI, the new legacy-family entry in -// peerFamilyGroups + the dollar-units pre-filter should surface m5 as -// an alternative when its (NF × EMC) is at least the source's units. -func TestAnalyzeReshapingWithOfferings_LegacyFamilyM4GeneratesM5Alternative(t *testing.T) { - t.Parallel() - - lookup := func(_ context.Context, _ []string) ([]OfferingOption, error) { - return []OfferingOption{ - // m4.large primary: NF=4, EMC=$50 → 200 units (matches src 200, passes the boundary). - {InstanceType: "m4.large", OfferingID: "off-m4l", EffectiveMonthlyCost: 50.0, NormalizationFactor: 4}, - // m5.large alternative: NF=4, EMC=$60 → 240 units (>= src 200 → passes). - {InstanceType: "m5.large", OfferingID: "off-m5l", EffectiveMonthlyCost: 60.0, NormalizationFactor: 4}, - }, nil - } - - recs := AnalyzeReshapingWithOfferings( - context.Background(), - []RIInfo{{ - ID: "ri-m4", InstanceType: "m4.xlarge", InstanceCount: 1, OfferingClass: "convertible", - NormalizationFactor: 8, MonthlyCost: 25, // src units = 200 - }}, - []UtilizationInfo{{RIID: "ri-m4", UtilizationPercent: 50}}, - 95, - lookup, - ) - require.Len(t, recs, 1) - got := recs[0] - assert.Equal(t, "m4.large", got.TargetInstanceType) - require.Len(t, got.AlternativeTargets, 1, "m5.large should pass the $-units check") - assert.Equal(t, "m5.large", got.AlternativeTargets[0].InstanceType) -} - -// TestAnalyzeReshapingWithOfferings_DollarUnitsFilterDropsUnviable -// — when an alternative would fail AWS's $-units check, the local -// pre-filter excludes it from the rec's AlternativeTargets so the UI -// doesn't show a suggestion that would be rejected at exchange time. -func TestAnalyzeReshapingWithOfferings_DollarUnitsFilterDropsUnviable(t *testing.T) { - t.Parallel() - - lookup := func(_ context.Context, _ []string) ([]OfferingOption, error) { - return []OfferingOption{ - // Primary m5.large stays untouched (it's not an "alternative"). - {InstanceType: "m5.large", OfferingID: "off-m5l", EffectiveMonthlyCost: 50.0, NormalizationFactor: 4}, - // m6i.large priced too cheap to satisfy the units check — should be filtered out. - {InstanceType: "m6i.large", OfferingID: "off-m6il", EffectiveMonthlyCost: 5.0, NormalizationFactor: 4}, - // m7g.large priced enough to pass. - {InstanceType: "m7g.large", OfferingID: "off-m7gl", EffectiveMonthlyCost: 50.0, NormalizationFactor: 4}, - }, nil - } - - recs := AnalyzeReshapingWithOfferings( - context.Background(), - []RIInfo{{ - ID: "ri-m5", InstanceType: "m5.xlarge", InstanceCount: 1, OfferingClass: "convertible", - NormalizationFactor: 8, MonthlyCost: 25, // src units = 200 - }}, - []UtilizationInfo{{RIID: "ri-m5", UtilizationPercent: 50}}, - 95, - lookup, - ) - require.Len(t, recs, 1) - alts := recs[0].AlternativeTargets - require.Len(t, alts, 1, "only m7g.large should pass; m6i.large gets filtered for failing the $-units check") - assert.Equal(t, "m7g.large", alts[0].InstanceType) -} - -// TestAnalyzeReshapingWithOfferings_NoSourcePricingSkipsFilter — when -// the caller doesn't populate RIInfo.MonthlyCost (zero), the filter is -// skipped entirely and behaviour matches today's "name + offering" -// shape. Pins backwards compatibility for older callers. -func TestAnalyzeReshapingWithOfferings_NoSourcePricingSkipsFilter(t *testing.T) { - t.Parallel() - - lookup := func(_ context.Context, _ []string) ([]OfferingOption, error) { - return []OfferingOption{ - {InstanceType: "m5.large", OfferingID: "off-m5l", EffectiveMonthlyCost: 50.0, NormalizationFactor: 4}, - // Would normally be dropped if the filter ran, but no source - // pricing means we keep today's "show every match" behaviour. - {InstanceType: "m6i.large", OfferingID: "off-m6il", EffectiveMonthlyCost: 5.0, NormalizationFactor: 4}, - {InstanceType: "m7g.large", OfferingID: "off-m7gl", EffectiveMonthlyCost: 50.0, NormalizationFactor: 4}, - }, nil - } - - recs := AnalyzeReshapingWithOfferings( - context.Background(), - // Older RIInfo shape: no MonthlyCost / CurrencyCode populated. - []RIInfo{{ID: "ri-m5", InstanceType: "m5.xlarge", InstanceCount: 1, OfferingClass: "convertible"}}, - []UtilizationInfo{{RIID: "ri-m5", UtilizationPercent: 50}}, - 95, - lookup, - ) - require.Len(t, recs, 1) - require.Len(t, recs[0].AlternativeTargets, 2, "with no source pricing, both alternatives must remain visible") -} diff --git a/providers/aws/services/ec2/client.go b/providers/aws/services/ec2/client.go index 6d6ded7d..95603460 100644 --- a/providers/aws/services/ec2/client.go +++ b/providers/aws/services/ec2/client.go @@ -457,95 +457,6 @@ type FindConvertibleOfferingParams struct { Duration int64 } -// FindConvertibleOfferings enumerates convertible RI offerings for -// every instance type in instanceTypes in a single -// DescribeReservedInstancesOfferings call (via a multi-value -// instance-type filter). Returns one OfferingOption per instance type -// that has at least one matching offering, sorted ascending by -// EffectiveMonthlyCost. Missing instance types (no matching offering -// in the region) are silently dropped — the caller treats that as -// "offer not available" rather than an error. -// -// Cost formula (matches AWS Reserved Instances console): -// -// monthly = (FixedPrice / hours_per_term + UsagePrice + Σ RecurringCharges[].Amount) × 730 -// hours_per_term = Duration_seconds / 3600 -// 730 = AWS's canonical hours-per-month -// -// Defaults mirror FindConvertibleOffering: Linux/UNIX, default tenancy, -// Region scope, 1 year term. -func (c *Client) FindConvertibleOfferings(ctx context.Context, instanceTypes []string) ([]exchange.OfferingOption, error) { - if len(instanceTypes) == 0 { - return nil, nil - } - duration := OneYearSeconds - filters := []types.Filter{ - {Name: aws.String("instance-type"), Values: instanceTypes}, - {Name: aws.String("product-description"), Values: []string{"Linux/UNIX"}}, - {Name: aws.String("instance-tenancy"), Values: []string{"default"}}, - {Name: aws.String("scope"), Values: []string{"Region"}}, - {Name: aws.String("duration"), Values: []string{fmt.Sprintf("%d", duration)}}, - {Name: aws.String("offering-class"), Values: []string{"convertible"}}, - } - input := &ec2.DescribeReservedInstancesOfferingsInput{ - Filters: filters, - IncludeMarketplace: aws.Bool(false), - MaxResults: aws.Int32(100), - } - result, err := c.client.DescribeReservedInstancesOfferings(ctx, input) - if err != nil { - return nil, fmt.Errorf("failed to describe convertible offerings: %w", err) - } - - // Pick the cheapest offering per instance type: AWS may return - // multiple offerings per (instance-type, duration, tenancy) tuple - // that differ only by payment option (all-upfront / partial / - // no-upfront); we want the one with the lowest effective monthly - // cost so the user's "alternative" comparison is apples-to-apples. - bestByType := make(map[string]exchange.OfferingOption) - for _, o := range result.ReservedInstancesOfferings { - instanceType := string(o.InstanceType) - cost := effectiveMonthlyCost(o) - if existing, ok := bestByType[instanceType]; ok && existing.EffectiveMonthlyCost <= cost { - continue - } - bestByType[instanceType] = exchange.OfferingOption{ - InstanceType: instanceType, - OfferingID: aws.ToString(o.ReservedInstancesOfferingId), - EffectiveMonthlyCost: cost, - NormalizationFactor: normalizationFactorForInstanceType(instanceType), - CurrencyCode: string(o.CurrencyCode), - } - } - - out := make([]exchange.OfferingOption, 0, len(bestByType)) - for _, o := range bestByType { - out = append(out, o) - } - sort.Slice(out, func(i, j int) bool { - return out[i].EffectiveMonthlyCost < out[j].EffectiveMonthlyCost - }) - return out, nil -} - -// effectiveMonthlyCost computes the monthly cost AWS's RI console -// displays. All three inputs (FixedPrice, UsagePrice, RecurringCharges) -// can be absent depending on the payment option — treat missing as 0. -func effectiveMonthlyCost(o types.ReservedInstancesOffering) float64 { - hoursPerTerm := float64(aws.ToInt64(o.Duration)) / 3600.0 - if hoursPerTerm <= 0 { - hoursPerTerm = float64(OneYearSeconds) / 3600.0 - } - fixed := float64(aws.ToFloat32(o.FixedPrice)) - usage := float64(aws.ToFloat32(o.UsagePrice)) - var recurring float64 - for _, rc := range o.RecurringCharges { - recurring += aws.ToFloat64(rc.Amount) - } - hourly := fixed/hoursPerTerm + usage + recurring - return hourly * 730.0 -} - // FindConvertibleOffering finds a convertible RI offering ID for the given parameters. func (c *Client) FindConvertibleOffering(ctx context.Context, params FindConvertibleOfferingParams) (string, error) { tenancy := params.Tenancy diff --git a/providers/aws/services/ec2/client_offerings_test.go b/providers/aws/services/ec2/client_offerings_test.go deleted file mode 100644 index f2f71e41..00000000 --- a/providers/aws/services/ec2/client_offerings_test.go +++ /dev/null @@ -1,112 +0,0 @@ -package ec2 - -import ( - "context" - "testing" - - "github.com/aws/aws-sdk-go-v2/aws" - "github.com/aws/aws-sdk-go-v2/service/ec2" - "github.com/aws/aws-sdk-go-v2/service/ec2/types" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/mock" -) - -// TestFindConvertibleOfferings_ReturnsRankedByEffectiveCost verifies that -// FindConvertibleOfferings batches its call, picks the cheapest offering -// per instance type, and sorts ascending by EffectiveMonthlyCost. -func TestFindConvertibleOfferings_ReturnsRankedByEffectiveCost(t *testing.T) { - t.Parallel() - mockEC2 := new(MockEC2Client) - client := &Client{client: mockEC2, region: "us-east-1"} - - // AWS returns 4 offerings: 2 for m5.large at different payment - // options, 1 for m6i.large, 1 for m7g.large. The method must - // collapse the two m5.large rows to the cheaper of the two and - // sort the result ascending by monthly cost. - // - // Costs chosen so sort order is m7g (cheapest) → m6i → m5 cheap - // variant → dropped m5 expensive variant. With 1-year term - // (31536000s = 8760 hours) and 730 hrs/month: - // m5.large cheap: FixedPrice=0, Usage=0.04 → 0.04 * 730 = 29.20 - // m5.large dear: FixedPrice=876, Usage=0 → 876/8760 * 730 = 73.00 - // m6i.large: FixedPrice=0, Usage=0.03 → 21.90 - // m7g.large: FixedPrice=0, Usage=0.02 → 14.60 - mockEC2.On("DescribeReservedInstancesOfferings", mock.Anything, - mock.MatchedBy(func(in *ec2.DescribeReservedInstancesOfferingsInput) bool { - // Assert the filter carries all three instance types in one call. - for _, f := range in.Filters { - if aws.ToString(f.Name) == "instance-type" { - if len(f.Values) != 3 { - return false - } - } - } - return true - })).Return(&ec2.DescribeReservedInstancesOfferingsOutput{ - ReservedInstancesOfferings: []types.ReservedInstancesOffering{ - { - ReservedInstancesOfferingId: aws.String("off-m5-cheap"), - InstanceType: "m5.large", - Duration: aws.Int64(OneYearSeconds), - FixedPrice: aws.Float32(0), - UsagePrice: aws.Float32(0.04), - }, - { - ReservedInstancesOfferingId: aws.String("off-m5-dear"), - InstanceType: "m5.large", - Duration: aws.Int64(OneYearSeconds), - FixedPrice: aws.Float32(876), - UsagePrice: aws.Float32(0), - }, - { - ReservedInstancesOfferingId: aws.String("off-m6i"), - InstanceType: "m6i.large", - Duration: aws.Int64(OneYearSeconds), - FixedPrice: aws.Float32(0), - UsagePrice: aws.Float32(0.03), - }, - { - ReservedInstancesOfferingId: aws.String("off-m7g"), - InstanceType: "m7g.large", - Duration: aws.Int64(OneYearSeconds), - FixedPrice: aws.Float32(0), - UsagePrice: aws.Float32(0.02), - }, - }, - }, nil) - - got, err := client.FindConvertibleOfferings(context.Background(), - []string{"m5.large", "m6i.large", "m7g.large"}) - assert.NoError(t, err) - if len(got) != 3 { - t.Fatalf("expected 3 offerings (one per instance type, expensive m5 dropped); got %d", len(got)) - } - - // Ascending by monthly cost. - assert.Equal(t, "m7g.large", got[0].InstanceType) - assert.Equal(t, "off-m7g", got[0].OfferingID) - assert.InDelta(t, 14.60, got[0].EffectiveMonthlyCost, 0.01) - - assert.Equal(t, "m6i.large", got[1].InstanceType) - assert.InDelta(t, 21.90, got[1].EffectiveMonthlyCost, 0.01) - - assert.Equal(t, "m5.large", got[2].InstanceType) - assert.Equal(t, "off-m5-cheap", got[2].OfferingID, "cheaper m5.large variant must win over the expensive one") - assert.InDelta(t, 29.20, got[2].EffectiveMonthlyCost, 0.01) - - // Single batched API call, not one per instance type. - mockEC2.AssertNumberOfCalls(t, "DescribeReservedInstancesOfferings", 1) -} - -// TestFindConvertibleOfferings_EmptyInputMakesNoCall guards against -// fan-out when the caller passes an empty list (shouldn't happen in -// practice but the helper is library-public). -func TestFindConvertibleOfferings_EmptyInputMakesNoCall(t *testing.T) { - t.Parallel() - mockEC2 := new(MockEC2Client) - client := &Client{client: mockEC2} - got, err := client.FindConvertibleOfferings(context.Background(), nil) - assert.NoError(t, err) - assert.Nil(t, got) - mockEC2.AssertNotCalled(t, "DescribeReservedInstancesOfferings", mock.Anything, mock.Anything) -}