diff --git a/README.md b/README.md index 51e69d02..f186925a 100644 --- a/README.md +++ b/README.md @@ -114,10 +114,22 @@ go install github.com/LeanerCloud/CUDly/cmd@latest |------|-------------|---------| | `-p, --payment` | Payment option: `all-upfront`, `partial-upfront`, `no-upfront` | no-upfront | | `-t, --term` | Term in years: `1` or `3` | 3 | -| `-c, --coverage` | Coverage percentage (0-100) | 80 | +| `-c, --coverage` | Coverage percentage (0-100) — % of each recommendation's instance count to purchase | 80 | +| `-u, --target-coverage` | Target % (0-100) of historical demand to cover with commitments; the rest spills to on-demand. Sizes counts so projected coverage approximates target, projected utilization stays near 100%. Overrides `--coverage`. | 0 (disabled) | | `--max-instances` | Maximum instances to purchase (0 = unlimited) | 0 | | `--override-count` | Override recommended count with specific value | 0 | +> **`--coverage` vs `--target-coverage`**: two related but distinct +> sizing levers. `--coverage` scales each AWS recommendation's instance +> count by a fixed fraction (`rec.Count * coverage/100`). +> `--target-coverage` sizes against historical average hourly usage +> instead (`floor(avg * target/100)`), so the resulting count reflects +> real demand rather than AWS's recommended count. Both lean the same +> direction (higher value = more RIs, lower value = fewer), but +> `--target-coverage` is the right lever when the historical-usage +> signal is what you want to size by and you're explicitly leaving +> on-demand headroom for growth or bursts. + ### Execution Control | Flag | Description | Default | diff --git a/cmd/helpers.go b/cmd/helpers.go index 8818fffc..41105f2b 100644 --- a/cmd/helpers.go +++ b/cmd/helpers.go @@ -5,6 +5,7 @@ import ( "context" "fmt" "log" + "math" "os" "strings" "sync" @@ -112,7 +113,14 @@ func CalculateTotalInstances(recs []common.Recommendation) int { return total } -// ApplyCoverage applies coverage percentage to recommendations +// ApplyCoverage applies coverage percentage to recommendations. +// +// All cost-bearing fields (CommitmentCost, OnDemandCost, EstimatedSavings, +// and for SPs the SavingsPlanDetails.HourlyCommitment) scale by coverage/100 +// so the returned Recommendation represents the sized purchase rather than +// AWS's pre-sized proposal. SavingsPercentage is invariant (savings vs +// on-demand ratio) and stays unscaled. Pre-sizing values can still be +// recovered: RecommendedCount holds AWS's pre-sized count for RIs. func ApplyCoverage(recs []common.Recommendation, coverage float64) []common.Recommendation { if coverage >= 100 { return recs @@ -121,7 +129,7 @@ func ApplyCoverage(recs []common.Recommendation, coverage float64) []common.Reco return []common.Recommendation{} } - // Apply coverage by reducing counts (for RIs) or hourly commitment (for Savings Plans) + ratio := coverage / 100.0 result := make([]common.Recommendation, 0, len(recs)) for _, rec := range recs { adjusted := rec @@ -135,10 +143,9 @@ func ApplyCoverage(recs []common.Recommendation, coverage float64) []common.Reco if common.IsSavingsPlan(rec.Service) { if details, ok := rec.Details.(*common.SavingsPlanDetails); ok { newDetails := *details // Copy the struct - newDetails.HourlyCommitment = newDetails.HourlyCommitment * coverage / 100 + newDetails.HourlyCommitment = newDetails.HourlyCommitment * ratio + adjusted = common.ScaleRecommendationCosts(adjusted, ratio) adjusted.Details = &newDetails - // Also adjust the estimated savings proportionally - adjusted.EstimatedSavings = rec.EstimatedSavings * coverage / 100 } else { AppLogger.Printf("WARNING: SP recommendation for service %q has unexpected Details type %T; passing through unscaled\n", rec.Service, rec.Details) } @@ -146,9 +153,19 @@ func ApplyCoverage(recs []common.Recommendation, coverage float64) []common.Reco continue } - // For RIs, reduce the count - newCount := int(float64(rec.Count) * coverage / 100) + // For RIs, reduce the count and scale cost-bearing fields by the + // DISCRETE count ratio (newCount / rec.Count) rather than the + // requested ratio. Truncating newCount to an int then multiplying + // costs by the unrounded ratio desynchronises Count and costs: + // e.g. rec.Count=3 + ratio=0.5 yields newCount=1 (33% of instances) + // but costs would scale to 50%, overstating the sized purchase + // price by ~50%. Mirrors ApplyTargetCoverage / family-NU sizing. + // rec.Count is guaranteed > 0 here because newCount > 0 implies + // rec.Count >= 1 (int(0 * ratio) is 0 for any ratio). + newCount := int(float64(rec.Count) * ratio) if newCount > 0 { + sizedRatio := float64(newCount) / float64(rec.Count) + adjusted = common.ScaleRecommendationCosts(adjusted, sizedRatio) adjusted.Count = newCount result = append(result, adjusted) } @@ -156,6 +173,300 @@ func ApplyCoverage(recs []common.Recommendation, coverage float64) []common.Reco return result } +// ApplyTargetCoverage sizes RI/SP recommendations so that projected +// post-purchase COVERAGE lands near targetPct, leaving (100-targetPct)% of +// historical demand on-demand as headroom. See ApplyCoverage for the simpler +// rec.Count-scaled coverage flag; the two are dispatched via applySizing. +// +// AWS's recommendation count is sized for ~100% coverage of historical demand +// (average instances used per hour). --target-coverage is the lever the +// operator uses to deliberately under-buy that baseline, accepting more +// on-demand spend in exchange for less idle commitment when demand is bursty +// or trending down. +// +// The flag name says "utilization" because the original framing (issue #338) +// was a utilization floor. In practice operators set values like 70 or 80 +// expecting coverage near that figure (with utilization staying ~100% on the +// commitments actually purchased), not the over-buy semantics that floor +// produces; see the #338 review discussion for the redirect. +// +// RIs (existing-aware, per-pool, strict-target): +// +// gap = targetPct - ExistingCoveragePct (percentage points) +// remaining_gap = 100 - ExistingCoveragePct (percentage points) +// n_target = floor(rec.Count * gap / remaining_gap) +// +// The formula scales AWS's per-account-incremental rec.Count by the +// fraction of the current-to-100% gap we want to fill. For example +// with existing=50% and target=80%: gap=30, remaining_gap=50, so we +// buy 30/50 = 60% of AWS's rec.Count. Anchoring to rec.Count (which +// AWS computed per-linked-account) is more robust in multi-account +// orgs than scaling against avg, since CE's ExistingCoveragePct is +// org-wide averaged and mixes accounts together. +// +// If gap <= 0 (existing already at/above target) → drop with INFO log. +// If n_target == 0 (gap too small to fit one RI) → drop with INFO log. +// If AverageInstancesUsedPerHour <= 0 → pass through (no signal); counted +// in the per-run skip summary. +// Projected coverage = ExistingCoveragePct + n_target/avg * 100 (total +// coverage after the purchase, clamped to 100). Projected utilization = +// avg/n_target * 100 clamped to 100. +// +// ExistingCoveragePct is sourced from CE GetReservationCoverage in the +// same pool; zero means "no signal" and the formula reduces to +// floor(rec.Count * target/100) — i.e. plain target% of AWS's count. +// For RDS the coverage lookup keys by (region, instance_type, engine). +// Floor (rather than ceil or round) gives strict "at-most-target" +// sizing. Pools too small to approximate the target meaningfully +// should be filtered upstream via --min-pool-size; floor will drop +// them as zero-count otherwise. +// +// Pools where CE reports 100% existing coverage but AWS still recommends +// new RIs (typical when existing RIs are near expiry) are dropped here — +// the existing coverage is honoured strictly. Use --rebuy-window-days to +// surface those replacements before the cliff. +// +// SPs: +// +// Scale SavingsPlanDetails.HourlyCommitment and EstimatedSavings by +// targetPct/100 (the same lever ApplyCoverage's SP branch uses, but with +// the explicit utilization-target framing). RecommendedUtilization is used +// only as the no-signal guard: when AWS hasn't returned a projected +// utilization figure, we pass the rec through unchanged and count it in +// the skip summary, since we can't sanity-check what the scaled commitment +// would mean. +// If RecommendedUtilization <= 0 → pass through; counted in skip summary. +// +// Recs of any other CommitmentType are passed through unmodified (warned +// once per type per run). +func ApplyTargetCoverage(recs []common.Recommendation, targetPct float64) []common.Recommendation { + if targetPct <= 0 || targetPct > 100 { + // Validation ensures we never get here in production, but be defensive + // so a buggy caller doesn't divide by zero. + AppLogger.Printf("WARNING: ApplyTargetCoverage called with targetPct=%.2f outside (0,100]; returning recs unchanged\n", targetPct) + return recs + } + + result := make([]common.Recommendation, 0, len(recs)) + var skipped int + unsupportedSeen := make(map[common.CommitmentType]bool) + + for _, rec := range recs { + adjusted, kept, missingSignal := applyTargetCoverageOne(rec, targetPct, unsupportedSeen) + if missingSignal { + skipped++ + } + if kept { + result = append(result, adjusted) + } + } + + if skipped > 0 { + AppLogger.Printf("INFO: --target-coverage=%.1f%% skipped %d of %d recommendations with no utilization signal (passed through unchanged)\n", + targetPct, skipped, len(recs)) + } + + return result +} + +// applyTargetCoverageOne dispatches a single recommendation through the +// appropriate branch. Returns (rec, kept, missingSignal): +// - kept=true → caller appends `rec` (the adjusted or pass-through value). +// - kept=false → caller drops the rec (only the RI "target unreachable" +// branch returns this; an INFO log already fired). +// - missingSignal=true → counted toward the end-of-run skip summary. +// +// Split out of ApplyTargetCoverage to keep that function under gocyclo's +// complexity threshold. +func applyTargetCoverageOne(rec common.Recommendation, targetPct float64, unsupportedSeen map[common.CommitmentType]bool) (common.Recommendation, bool, bool) { + switch { + case common.IsSavingsPlan(rec.Service): + adjusted, ok := applyTargetCoverageSP(rec, targetPct) + if !ok { + // SP no-signal: pass through unchanged. + return rec, true, true + } + return adjusted, true, false + case rec.CommitmentType == common.CommitmentReservedInstance: + adjusted, ok := applyTargetCoverageRI(rec, targetPct) + if !ok { + // Distinguish "no signal" (pass through, count in summary) from + // "target unreachable" (drop with already-fired INFO log). + if rec.AverageInstancesUsedPerHour <= 0 { + return rec, true, true + } + return rec, false, false + } + return adjusted, true, false + default: + if !unsupportedSeen[rec.CommitmentType] { + AppLogger.Printf("WARNING: --target-coverage not supported for CommitmentType=%q; passing recommendations through unchanged\n", rec.CommitmentType) + unsupportedSeen[rec.CommitmentType] = true + } + return rec, true, false + } +} + +// applyTargetCoverageRI is the RI branch of ApplyTargetCoverage. Returns +// (adjusted, true) on success, (rec, false) when the rec should be passed +// through unscaled (no signal) or dropped (target unreachable). Caller +// distinguishes the two via rec.AverageInstancesUsedPerHour. +func applyTargetCoverageRI(rec common.Recommendation, targetPct float64) (common.Recommendation, bool) { + if rec.AverageInstancesUsedPerHour <= 0 { + // No signal — caller will pass through and count in the summary. + return rec, false + } + + avg := rec.AverageInstancesUsedPerHour + // Coverage-anchored under-buy: size linearly off the pool's avg demand + // and the absolute gap to target. Both inputs come from + // GetReservationCoverage (AvgInstancesPerHour from + // TotalRunningHours/window; ExistingCoveragePct from + // CoverageHoursPercentage) so the buy lines up with the AWS console's + // reservations-coverage report: target%-existing% of avg instances. + // + // The previous formula anchored on AWS's rec.Count + // (floor(rec.Count × gap / (100−existing))), which under-bought when + // AWS sized rec.Count for less than full coverage (ROI-curated) and + // when CE's org-wide existing% disagreed with rec.Count's per-account + // derivation. Anchoring on coverage's own avg removes both mismatches. + // rec.Count is retained only for the cost-scaling ratio further down. + // + // Keep the subtraction in percentage units (subtract first, divide + // later) so whole-percent values don't lose precision to float + // rounding at integer boundaries. + gapPct := targetPct - rec.ExistingCoveragePct + if gapPct <= 0 { + // Existing commitments already meet or exceed the target; no purchase + // needed in this pool. Drop with an info log so operators can see what + // the flag did. Returning (_, false) with avg > 0 signals "drop, don't + // pass through". + AppLogger.Printf("INFO: --target-coverage=%.1f%% already met by existing coverage %.1f%% for %s/%s/%s; dropped recommendation\n", + targetPct, rec.ExistingCoveragePct, rec.Service, rec.Region, rec.ResourceType) + return rec, false + } + // Floor so we never over-shoot the target on integer-arithmetic edges. + // Strict-target semantics: 80% means "at most 80% coverage", not "at + // least 80%". Floor under-covers small/odd pools (e.g. avg=2, target=80 + // gives 1 RI = 50% rather than 2 RIs = 100%); pools too small to + // approximate target are best filtered out via --min-pool-size upstream. + nTarget := int(math.Floor(avg * gapPct / 100.0)) + + if nTarget == 0 { + // Floor produces zero when avg × gap% < 100 (small pools or thin + // gaps). Drop — buying 1 RI would over-shoot target and the + // strict-target intent prefers under-cover (run on-demand) over + // over-cover (idle commitment). Use --min-pool-size to filter + // these out earlier so they don't show up as drops in the log. + AppLogger.Printf("INFO: --target-coverage=%.1f%% sizes %s/%s/%s to 0 instances (avg=%.2f, gap=%.2f%% produces <1 RI); dropped recommendation\n", + targetPct, rec.Service, rec.Region, rec.ResourceType, avg, gapPct) + // Returning (_, false) with avg > 0 signals "drop, don't pass through". + // applyTargetCoverageRI's caller branches on + // rec.AverageInstancesUsedPerHour to distinguish drop vs no-signal. + return rec, false + } + + // Cost-bearing fields scale by the ratio of sized-to-original count, so the + // returned rec represents the sized purchase rather than AWS's pre-sized + // proposal. SavingsPercentage is invariant (savings vs on-demand ratio). + // rec.Count is the AWS pre-sizing count at this point (parser sets Count + // == RecommendedCount and we haven't mutated either yet). When the + // coverage-anchored nTarget exceeds rec.Count (AWS sized below full + // coverage), the ratio scales costs up linearly — accurate when per-RI + // pricing is constant, which it is within a single pool/term/payment + // combination. Guarded against rec.Count==0 (malformed rec) by falling + // back to nTarget so a zero-cost rec stays zero-cost rather than NaN. + var ratio float64 + if rec.Count > 0 { + ratio = float64(nTarget) / float64(rec.Count) + } else { + ratio = float64(nTarget) + } + adjusted := common.ScaleRecommendationCosts(rec, ratio) + adjusted.Count = nTarget + + // Projection metrics. ProjectedCoverage is TOTAL coverage (existing + + // new) so operators can see the figure they actually targeted. + // ProjectedUtilization stays at the per-purchase fill rate; under-buy + // keeps nTarget <= avg so it always clamps to 100%. + projUtil := avg / float64(nTarget) * 100.0 + if projUtil > 100 { + projUtil = 100 + } + projCov := rec.ExistingCoveragePct + float64(nTarget)/avg*100.0 + if projCov > 100 { + projCov = 100 + } + adjusted.ProjectedUtilization = projUtil + adjusted.ProjectedCoverage = projCov + return adjusted, true +} + +// applyTargetCoverageSP is the SP branch of ApplyTargetCoverage. Returns +// (adjusted, true) when the rec is kept, (rec, false) when it should be +// skipped (caller passes through unscaled and counts in the skip summary). +func applyTargetCoverageSP(rec common.Recommendation, targetPct float64) (common.Recommendation, bool) { + if rec.RecommendedUtilization <= 0 { + return rec, false + } + // Also treat a $0 HourlyCommitment as "no signal" — CE occasionally + // returns placeholder recs with zero commitment. Sizing such a rec + // would produce nonsense ($0 commitment * ratio = $0) while still + // claiming the target coverage is achieved, which is incoherent. + // Pass through unchanged and count in the skip summary. + if details, ok := rec.Details.(*common.SavingsPlanDetails); ok && details.HourlyCommitment <= 0 { + return rec, false + } + + // Under-buy: scale all cost-bearing fields by target/100 against AWS's + // recommended commitment. This deliberately spends less than AWS suggested, + // leaving (100-target)% of the SP's projected workload on on-demand. + // RecommendedUtilization is consulted only as a no-signal guard above (a + // zero value means we can't sanity-check the result); the scaling itself + // uses targetPct directly rather than a recUtil/target ratio so the flag's + // intent is honoured even when AWS already projects above target. + // + // If Details isn't a *SavingsPlanDetails (defensive — should always be + // for SP recs), log a warning and pass through UNCHANGED — including + // leaving ProjectedUtilization at zero. Setting projection fields on a + // rec whose commitment fields couldn't be scaled would produce a + // misleading row (projection=target%, savings=full-unscaled). + details, ok := rec.Details.(*common.SavingsPlanDetails) + if !ok { + AppLogger.Printf("WARNING: SP recommendation for service %q has unexpected Details type %T; passing through unscaled\n", rec.Service, rec.Details) + return rec, true + } + ratio := targetPct / 100.0 + newDetails := *details // copy + newDetails.HourlyCommitment = newDetails.HourlyCommitment * ratio + adjusted := common.ScaleRecommendationCosts(rec, ratio) + adjusted.Details = &newDetails + // Shrinking commitment raises projected utilization by 1/ratio + // (used is fixed = orig_commit * RecUtil, bought is orig_commit * ratio). + // Clamp to 100 since utilization caps at full use. + projUtil := rec.RecommendedUtilization / ratio + if projUtil > 100 { + projUtil = 100 + } + adjusted.ProjectedUtilization = projUtil + // ProjectedCoverage stays zero for SPs — CE doesn't expose total-demand-$ + // for a clean coverage figure (see field doc on Recommendation). + return adjusted, true +} + +// applySizing chooses target-coverage or coverage sizing. +// +// coverage is the effective % to apply when target-coverage is unset +// (the main path passes cfg.Coverage; the CSV path passes csvModeCoverage, +// which substitutes the default 80% with 100% so CSV-driven counts aren't +// silently dropped). +func applySizing(recs []common.Recommendation, cfg Config, coverage float64) []common.Recommendation { + if cfg.TargetCoverage > 0 { + return ApplyTargetCoverage(recs, cfg.TargetCoverage) + } + return ApplyCoverage(recs, coverage) +} + // ApplyCountOverride overrides the count for all recommendations func ApplyCountOverride(recs []common.Recommendation, overrideCount int32) []common.Recommendation { if overrideCount <= 0 { diff --git a/cmd/helpers_test.go b/cmd/helpers_test.go index e43a1f1e..afbc0084 100644 --- a/cmd/helpers_test.go +++ b/cmd/helpers_test.go @@ -344,6 +344,38 @@ func TestApplyCoverage(t *testing.T) { } } +// TestApplyCoverage_RICostScaling locks the fix for the CR finding on +// helpers.go:159-166: cost-bearing fields must scale by the DISCRETE +// count ratio (newCount / rec.Count), not the raw coverage ratio. With +// rec.Count=3 and coverage=50%, newCount=int(1.5)=1 (33% of instances) +// so costs must drop to 33% of original, not 50%, otherwise the sized +// purchase reads ~50% more expensive than what was actually bought. +func TestApplyCoverage_RICostScaling(t *testing.T) { + monthly := 60.0 + recs := []common.Recommendation{ + { + Service: common.ServiceEC2, + CommitmentType: common.CommitmentReservedInstance, + Count: 3, + CommitmentCost: 900, + OnDemandCost: 1800, + EstimatedSavings: 300, + RecurringMonthlyCost: &monthly, + }, + } + out := ApplyCoverage(recs, 50.0) + require.Len(t, out, 1) + // newCount = int(3 * 0.5) = 1. sizedRatio = 1/3. + assert.Equal(t, 1, out[0].Count) + assert.InDelta(t, 300.0, out[0].CommitmentCost, 0.01, "CommitmentCost scales by 1/3 (newCount/rec.Count), NOT 0.5 (raw ratio)") + assert.InDelta(t, 600.0, out[0].OnDemandCost, 0.01) + assert.InDelta(t, 100.0, out[0].EstimatedSavings, 0.01) + require.NotNil(t, out[0].RecurringMonthlyCost) + assert.InDelta(t, 20.0, *out[0].RecurringMonthlyCost, 0.01, "RecurringMonthlyCost scales by sized ratio too") + // Original pointer not mutated. + assert.Equal(t, 60.0, monthly) +} + func TestAdjustRecommendationsForExisting(t *testing.T) { ctx := context.Background() @@ -898,3 +930,453 @@ func TestAdjustRecommendationsForExisting_PartialCoverage(t *testing.T) { assert.Empty(t, filtered) // partial coverage stays in passed, not filtered mockClient.AssertExpectations(t) } + +// TestApplyTargetCoverage covers the RI sizing branch of issue #338's +// --target-coverage flag, now under-buy semantics: n = floor(avg*target). +// Confirms: floor (not ceil) selection so coverage stays at-most target, +// drop-when-target-too-low (avg*target < 1), no-signal pass-through, and +// projected utilization (typically 100% since we under-buy) / coverage +// (tracks target%) outputs. +func TestApplyTargetCoverage_RI(t *testing.T) { + mkRI := func(count int, avg, recUtil float64) common.Recommendation { + return common.Recommendation{ + Service: common.ServiceEC2, + Region: "us-east-1", + ResourceType: "t3.medium", + Count: count, + CommitmentType: common.CommitmentReservedInstance, + CommitmentCost: 1000, + OnDemandCost: 2000, + EstimatedSavings: 500, + AverageInstancesUsedPerHour: avg, + RecommendedUtilization: recUtil, + } + } + + tests := []struct { + name string + rec common.Recommendation + target float64 + wantDropped bool + wantCount int + wantProjUtil float64 // 0 means "don't assert" + wantProjCovGTE float64 // we assert coverage >= this (handles the float clamping) + }{ + { + // avg=8.5, target=95%, existing=0%. + // gap=95. n = floor(8.5 * 95/100) = floor(8.075) = 8. + // Projected util = 8.5/8 = 106.25 → clamped to 100. + // Projected cov = 0 + 8/8.5*100 = 94.117…% + name: "RI: target 95 buys 8 (floor of avg*0.95)", + rec: mkRI(10, 8.5, 0), + target: 95, + wantCount: 8, + wantProjUtil: 100, + wantProjCovGTE: 94.0, + }, + { + // avg=10, target=50%, existing=0%. n=floor(10*0.5)=5. + // Projected cov = 5/10*100 = 50.0%. + name: "RI: target 50 buys half of avg demand", + rec: mkRI(10, 10, 0), + target: 50, + wantCount: 5, + wantProjUtil: 100, // 10/5 clamped + wantProjCovGTE: 50.0, + }, + { + // avg=0.4, target=50%, existing=0%. + // n = floor(0.4 * 50/100) = floor(0.2) = 0 → DROPPED. + // Tiny pools where avg×gap%<100 produce 0 RIs under the + // coverage-anchored formula. --min-pool-size upstream is the + // intended filter for these; the drop here is the fallback + // when the upstream filter wasn't applied. + name: "RI: tiny avg below 1-RI threshold drops", + rec: mkRI(5, 0.4, 0), + target: 50, + wantDropped: true, + }, + { + // avg=0 (no signal) → passed through unchanged, counted in skip summary. + // Projection metrics never set on the pass-through path. + name: "RI: no signal → passed through unmodified", + rec: mkRI(5, 0, 0), + target: 80, + wantCount: 5, + wantProjUtil: 0, // never set in pass-through + }, + { + // avg=4, target=80%, existing=0%. n=floor(4*0.8)=3. + // Projected util = 4/3 = 133% clamped to 100. + // Projected cov = 3/4*100 = 75.0%. + name: "RI: target 80 buys floor(avg*0.8)", + rec: mkRI(5, 4, 0), + target: 80, + wantCount: 3, + wantProjUtil: 100, + wantProjCovGTE: 75.0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + recs := []common.Recommendation{tt.rec} + out := ApplyTargetCoverage(recs, tt.target) + if tt.wantDropped { + if len(out) != 0 { + t.Fatalf("expected drop; got %d recs", len(out)) + } + return + } + if len(out) != 1 { + t.Fatalf("expected 1 rec; got %d", len(out)) + } + if out[0].Count != tt.wantCount { + t.Errorf("Count: got %d, want %d", out[0].Count, tt.wantCount) + } + if tt.wantProjUtil > 0 { + if math.Abs(out[0].ProjectedUtilization-tt.wantProjUtil) > 0.01 { + t.Errorf("ProjectedUtilization: got %.4f, want %.4f", + out[0].ProjectedUtilization, tt.wantProjUtil) + } + } + // Zero means "don't assert" (matches the wantProjUtil convention) + // since the pass-through path leaves ProjectedCoverage at zero. + if tt.wantProjCovGTE > 0 { + if out[0].ProjectedCoverage < tt.wantProjCovGTE-0.01 { + t.Errorf("ProjectedCoverage: got %.4f, want >= %.4f", + out[0].ProjectedCoverage, tt.wantProjCovGTE) + } + } + }) + } +} + +// TestApplyTargetCoverage_RI_CostScaling verifies RI cost-bearing fields +// scale by the sized-to-original count ratio. SavingsPercentage is invariant. +// The scaled values let downstream consumers (CSV writer, reporter, audit +// log) trust rec.CommitmentCost / rec.EstimatedSavings as the sized purchase +// rather than AWS's pre-sized proposal. +func TestApplyTargetCoverage_RI_CostScaling(t *testing.T) { + rec := common.Recommendation{ + Service: common.ServiceEC2, + Count: 10, + CommitmentType: common.CommitmentReservedInstance, + CommitmentCost: 1000, + OnDemandCost: 2000, + EstimatedSavings: 500, + SavingsPercentage: 25, + AverageInstancesUsedPerHour: 8, + } + // target=80, existing=0, avg=8 → gap=80. + // n = floor(8 * 80/100) = 6. Ratio = 6/10 = 0.6 (cost scaling still + // uses rec.Count to convert AWS's quoted cost-for-rec.Count into + // cost-for-nTarget). + out := ApplyTargetCoverage([]common.Recommendation{rec}, 80) + require.Len(t, out, 1) + assert.Equal(t, 6, out[0].Count) + assert.InDelta(t, 600.0, out[0].CommitmentCost, 0.001, "CommitmentCost scales by nTarget/rec.Count") + assert.InDelta(t, 1200.0, out[0].OnDemandCost, 0.001, "OnDemandCost scales by nTarget/rec.Count") + assert.InDelta(t, 300.0, out[0].EstimatedSavings, 0.001, "EstimatedSavings scales by nTarget/rec.Count") + assert.Equal(t, 25.0, out[0].SavingsPercentage, "SavingsPercentage is invariant under count scaling") + + t.Run("RecurringMonthlyCost scales by ratio when populated", func(t *testing.T) { + // AWS populated RecurringStandardMonthlyCost for partial/no-upfront + // recs; sized purchase must scale this monthly fee by the same + // nTarget/rec.Count ratio so total cost (upfront + monthly × term) + // reflects what the user actually buys. + monthly := 50.0 + recWithMonthly := rec + recWithMonthly.RecurringMonthlyCost = &monthly + out := ApplyTargetCoverage([]common.Recommendation{recWithMonthly}, 80) + require.Len(t, out, 1) + require.NotNil(t, out[0].RecurringMonthlyCost, "scaled pointer should be non-nil") + assert.InDelta(t, 30.0, *out[0].RecurringMonthlyCost, 0.001, "monthly cost scales by 6/10") + // Original pointer untouched (we allocated a new one). + assert.Equal(t, 50.0, monthly, "original RecurringMonthlyCost target should not be mutated") + }) + + t.Run("RecurringMonthlyCost stays nil when not populated", func(t *testing.T) { + // AWS API didn't return RecurringStandardMonthlyCost (all-upfront, + // or field missing). The sized rec should also have nil so + // downstream renders "unknown" rather than zero. + out := ApplyTargetCoverage([]common.Recommendation{rec}, 80) + require.Len(t, out, 1) + assert.Nil(t, out[0].RecurringMonthlyCost, "nil input → nil output") + }) +} + +// TestApplyTargetCoverage_RI_ExistingCoverage covers the under-buy formula's +// existing-commitment branch: gap = (target - existing_cov)/100, then +// n_target = floor(avg * gap). Matches the worked example from the #338 design +// thread (20 instances, 10 existing RIs at 50% coverage, target 80% → buy 6). +func TestApplyTargetCoverage_RI_ExistingCoverage(t *testing.T) { + mkRI := func(count int, avg, existingCov float64) common.Recommendation { + return common.Recommendation{ + Service: common.ServiceEC2, + Region: "us-east-1", + ResourceType: "t3.medium", + Count: count, + RecommendedCount: count, + CommitmentType: common.CommitmentReservedInstance, + CommitmentCost: 1000, + OnDemandCost: 2000, + EstimatedSavings: 500, + AverageInstancesUsedPerHour: avg, + ExistingCoveragePct: existingCov, + } + } + + tests := []struct { + name string + rec common.Recommendation + target float64 + wantDropped bool + wantCount int + wantTotalCov float64 // ProjectedCoverage = existing + new contribution + }{ + { + // User's worked example: avg=20, existing=50%, target=80%. + // gap=30. n=ceil(20*0.30)=6. Total cov = 50 + 6/20*100 = 80. + name: "User example: 50% existing, 80% target on avg=20 → buy 6", + rec: mkRI(10, 20, 50), + target: 80, + wantCount: 6, + wantTotalCov: 80, + }, + { + // existing=0%, target=70%, avg=10. gap=70. n=ceil(7)=7. + name: "Zero existing: ceil(avg*target/100)", + rec: mkRI(10, 10, 0), + target: 70, + wantCount: 7, + wantTotalCov: 70, + }, + { + // existing=80% meets target=80% → drop. + name: "Existing meets target exactly: drop", + rec: mkRI(10, 10, 80), + target: 80, + wantDropped: true, + }, + { + // existing=95% exceeds target=80% → drop. + name: "Existing exceeds target: drop", + rec: mkRI(10, 10, 95), + target: 80, + wantDropped: true, + }, + { + // avg=2, existing=70%, target=80%. gap=10. + // n = floor(2 * 10/100) = floor(0.2) = 0 → DROPPED. + // Small pool + thin gap: no integer buy can approximate the + // target. --min-pool-size upstream is the intended filter. + name: "Small gap on tiny avg drops", + rec: mkRI(5, 2, 70), + target: 80, + wantDropped: true, + }, + { + // avg=10, existing=60%, target=70%. gap=10. + // n = floor(10 * 10/100) = 1. Total cov = 60 + 1/10*100 = 70. + // Coverage-anchored: 1 RI exactly closes the 10-point gap + // because avg=10 and each RI is worth 10% of avg demand. + name: "Small top-up: 1 RI exactly closes 10-pt gap on avg=10", + rec: mkRI(10, 10, 60), + target: 70, + wantCount: 1, + wantTotalCov: 70, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + out := ApplyTargetCoverage([]common.Recommendation{tt.rec}, tt.target) + if tt.wantDropped { + assert.Len(t, out, 0, "expected drop") + return + } + require.Len(t, out, 1) + assert.Equal(t, tt.wantCount, out[0].Count, "Count") + assert.InDelta(t, tt.wantTotalCov, out[0].ProjectedCoverage, 0.01, "ProjectedCoverage is TOTAL (existing + new)") + }) + } +} + +// TestApplyTargetCoverage_SP covers the SP sizing branch under under-buy +// semantics: HourlyCommitment and EstimatedSavings scale by targetPct/100 +// regardless of AWS's projected utilization. CommitmentCost / OnDemandCost / +// SavingsPercentage must NOT change. +func TestApplyTargetCoverage_SP(t *testing.T) { + mkSP := func(recUtil float64) common.Recommendation { + return common.Recommendation{ + Service: common.ServiceSavingsPlansCompute, + CommitmentType: common.CommitmentSavingsPlan, + CommitmentCost: 1000, + OnDemandCost: 5000, + EstimatedSavings: 1500, + SavingsPercentage: 30, + RecommendedUtilization: recUtil, + Details: &common.SavingsPlanDetails{HourlyCommitment: 2.0}, + } + } + + t.Run("AWS above target — still scales by target (under-buy)", func(t *testing.T) { + // RecUtil=95, target=80. Even though AWS projects above target, the + // flag's intent is "leave 20% headroom", so the commitment shrinks + // to 80% of AWS rec. All cost-bearing fields scale by 0.8. + // Projected util = 95/0.80 = 118.75 clamped to 100. + out := ApplyTargetCoverage([]common.Recommendation{mkSP(95)}, 80) + require.Len(t, out, 1) + assert.InDelta(t, 1.6, out[0].Details.(*common.SavingsPlanDetails).HourlyCommitment, 0.001) + assert.InDelta(t, 800.0, out[0].CommitmentCost, 0.001, "CommitmentCost scales by target/100") + assert.InDelta(t, 4000.0, out[0].OnDemandCost, 0.001, "OnDemandCost scales by target/100") + assert.InDelta(t, 1200.0, out[0].EstimatedSavings, 0.001) + assert.Equal(t, 30.0, out[0].SavingsPercentage, "SavingsPercentage is invariant") + assert.InDelta(t, 100.0, out[0].ProjectedUtilization, 0.001, "RecUtil/ratio = 95/0.80 = 118.75 clamps to 100") + assert.Equal(t, 0.0, out[0].ProjectedCoverage, "SPs intentionally leave ProjectedCoverage at zero") + }) + + t.Run("AWS below target — scale down by target (under-buy)", func(t *testing.T) { + // RecUtil=50, target=80. All cost-bearing fields shrink to 80%. + // Projected util = 50/0.80 = 62.5 (no clamp needed). + out := ApplyTargetCoverage([]common.Recommendation{mkSP(50)}, 80) + require.Len(t, out, 1) + details := out[0].Details.(*common.SavingsPlanDetails) + assert.InDelta(t, 1.6, details.HourlyCommitment, 0.001) + assert.InDelta(t, 800.0, out[0].CommitmentCost, 0.001, "CommitmentCost scales by target/100") + assert.InDelta(t, 4000.0, out[0].OnDemandCost, 0.001, "OnDemandCost scales by target/100") + assert.InDelta(t, 1200.0, out[0].EstimatedSavings, 0.001) + assert.Equal(t, 30.0, out[0].SavingsPercentage, "SavingsPercentage is invariant") + assert.InDelta(t, 62.5, out[0].ProjectedUtilization, 0.001, "RecUtil/ratio = 50/0.80 = 62.5") + assert.Equal(t, 0.0, out[0].ProjectedCoverage) + }) + + t.Run("no signal → passed through unchanged", func(t *testing.T) { + out := ApplyTargetCoverage([]common.Recommendation{mkSP(0)}, 80) + require.Len(t, out, 1) + // Original recommendation values intact. + assert.Equal(t, 2.0, out[0].Details.(*common.SavingsPlanDetails).HourlyCommitment) + assert.Equal(t, 1500.0, out[0].EstimatedSavings) + assert.Equal(t, 0.0, out[0].ProjectedUtilization) + }) +} + +// TestApplySizing checks the routing helper picks the right sizer based +// on cfg.TargetCoverage being >0 vs ==0. +func TestApplySizing(t *testing.T) { + ri := common.Recommendation{ + Service: common.ServiceEC2, + Count: 10, + CommitmentType: common.CommitmentReservedInstance, + AverageInstancesUsedPerHour: 8, + } + + t.Run("TargetCoverage > 0 → ApplyTargetCoverage", func(t *testing.T) { + cfg := Config{TargetCoverage: 80, Coverage: 100} + out := applySizing([]common.Recommendation{ri}, cfg, cfg.Coverage) + require.Len(t, out, 1) + // avg=8, target=80%, existing=0%. gap=80. + // n = floor(8 * 80/100) = floor(6.4) = 6. ProjUtil = 8/6 = 133% → 100. + assert.Equal(t, 6, out[0].Count) + assert.Equal(t, 100.0, out[0].ProjectedUtilization) + }) + + t.Run("TargetCoverage == 0 → ApplyCoverage", func(t *testing.T) { + cfg := Config{TargetCoverage: 0, Coverage: 50} + out := applySizing([]common.Recommendation{ri}, cfg, cfg.Coverage) + require.Len(t, out, 1) + // ApplyCoverage(50) on count=10 → 5. ProjectedUtilization NOT set + // (zero) because we took the coverage branch. + assert.Equal(t, 5, out[0].Count) + assert.Equal(t, 0.0, out[0].ProjectedUtilization) + }) +} + +// TestApplyTargetCoverage_RI_Target100 covers the target == 100 boundary. +// With the coverage-anchored formula, target=100 (existing=0) yields +// n = floor(avg * 100/100) = floor(avg) — operators get a buy sized to +// the pool's average concurrent demand, not AWS's rec.Count (which may +// be sized to peak/ROI-curated). Pools where avg<1 drop; --min-pool-size +// is the intended upstream filter. +func TestApplyTargetCoverage_RI_Target100(t *testing.T) { + mkRI := func(count int, avg float64) common.Recommendation { + return common.Recommendation{ + Service: common.ServiceEC2, + Region: "us-east-1", + ResourceType: "t3.medium", + Count: count, + CommitmentType: common.CommitmentReservedInstance, + AverageInstancesUsedPerHour: avg, + } + } + + tests := []struct { + name string + rec common.Recommendation + wantDropped bool + wantCount int + }{ + // avg=0.999 → floor(0.999)=0 → drop. + {name: "target 100, avg=0.999 → drop (avg<1)", rec: mkRI(5, 0.999), wantDropped: true}, + // avg=1.0 → buy 1 (matches avg demand). + {name: "target 100, avg=1 → buy 1", rec: mkRI(5, 1.0), wantCount: 1}, + // avg=8.7 → floor(8.7) = 8. + {name: "target 100, avg=8.7 → buy floor(avg)=8", rec: mkRI(10, 8.7), wantCount: 8}, + // avg=10 → buy 10 (avg=10 means 10 concurrent instances on average). + {name: "target 100, avg=10 → buy 10", rec: mkRI(10, 10.0), wantCount: 10}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + out := ApplyTargetCoverage([]common.Recommendation{tt.rec}, 100) + if tt.wantDropped { + assert.Len(t, out, 0, "expected drop at target=100 for avg=%.3f", tt.rec.AverageInstancesUsedPerHour) + return + } + require.Len(t, out, 1) + assert.Equal(t, tt.wantCount, out[0].Count) + }) + } +} + +// TestApplyTargetCoverage_SP_NoSignalGuards covers the two SP no-signal +// branches: RecommendedUtilization <= 0 (already covered by other tests) and +// the new HourlyCommitment <= 0 guard (CE occasionally returns $0 +// placeholder recs). +func TestApplyTargetCoverage_SP_NoSignalGuards(t *testing.T) { + t.Run("HourlyCommitment=0 with positive RecommendedUtilization → pass through unscaled", func(t *testing.T) { + rec := common.Recommendation{ + Service: common.ServiceSavingsPlansCompute, + CommitmentType: common.CommitmentSavingsPlan, + EstimatedSavings: 1500, + RecommendedUtilization: 50, + Details: &common.SavingsPlanDetails{HourlyCommitment: 0}, + } + out := ApplyTargetCoverage([]common.Recommendation{rec}, 80) + require.Len(t, out, 1, "$0 SP rec should still be in output (pass-through)") + // Pass-through — projection fields must NOT be set, savings unchanged. + assert.Equal(t, 0.0, out[0].ProjectedUtilization, "ProjectedUtilization must NOT be set for $0-commitment pass-through") + assert.Equal(t, 1500.0, out[0].EstimatedSavings, "EstimatedSavings unchanged on pass-through") + assert.Equal(t, 0.0, out[0].Details.(*common.SavingsPlanDetails).HourlyCommitment, "HourlyCommitment unchanged") + }) + + t.Run("Details is wrong type → pass through unscaled, no projection metric set", func(t *testing.T) { + // Defensive case: SP rec with non-SP Details (a parser bug). The + // scaling can't proceed, and we MUST NOT set ProjectedUtilization + // to target% because the underlying cost fields aren't scaled — + // that would mislead the operator into thinking the rec was sized + // to the target when in fact it's the original unscaled commitment. + rec := common.Recommendation{ + Service: common.ServiceSavingsPlansCompute, + CommitmentType: common.CommitmentSavingsPlan, + EstimatedSavings: 1500, + RecommendedUtilization: 50, + Details: common.ComputeDetails{Platform: "Linux/UNIX"}, // wrong type + } + out := ApplyTargetCoverage([]common.Recommendation{rec}, 80) + require.Len(t, out, 1) + assert.Equal(t, 0.0, out[0].ProjectedUtilization, "must NOT set projection when scaling failed") + assert.Equal(t, 1500.0, out[0].EstimatedSavings, "EstimatedSavings must remain unscaled when scaling failed") + }) +} diff --git a/cmd/main.go b/cmd/main.go index 1571dff2..6a41b47b 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -33,10 +33,17 @@ const ( // Config holds all configuration for the RI helper tool type Config struct { - Providers []string - Regions []string - Services []string - Coverage float64 + Providers []string + Regions []string + Services []string + Coverage float64 + // TargetCoverage, when > 0, switches sizing from --coverage's + // rec.Count-scaling to under-buy against historical hourly usage: + // each rec is sized to floor(avg * TargetCoverage/100), leaving + // (100-TargetCoverage)% of historical demand on-demand. Mutually + // exclusive with --coverage (target wins, with an info log). See + // cmd/helpers.go: ApplyTargetCoverage. + TargetCoverage float64 ActualPurchase bool CSVOutput string CSVInput string @@ -67,6 +74,19 @@ type Config struct { MinSavingsPct float64 MaxBreakEvenMonths int MinCount int + // RebuyWindowDays, when > 0, treats existing RIs whose remaining term + // is at most this many days as already uncovered, so --target-coverage + // recommends replacements before they expire. Zero (default) keeps the + // strict per-pool subtraction — existing coverage is fully trusted + // regardless of when it expires. + RebuyWindowDays int + // MinPoolSize, when > 0, drops RI recommendations for pools whose + // AverageInstancesUsedPerHour is below this threshold. Used to avoid + // the integer-arithmetic over-cover problem on tiny pools (avg < 5 + // can't approximate target=80% without buying enough RIs to hit + // 100% coverage). Zero (default) keeps all pools. SPs and recs + // without a per-hour signal pass through unfiltered. + MinPoolSize float64 } func main() { @@ -92,6 +112,11 @@ func init() { rootCmd.Flags().StringSliceVarP(&toolCfg.Services, "services", "s", []string{"rds"}, "Services to process (rds, elasticache, ec2, opensearch, redshift, memorydb, savingsplans)") rootCmd.Flags().BoolVar(&toolCfg.AllServices, "all-services", false, "Process all supported services") rootCmd.Flags().Float64VarP(&toolCfg.Coverage, "coverage", "c", 80.0, "Percentage of recommendations to purchase (0-100)") + rootCmd.Flags().Float64VarP(&toolCfg.TargetCoverage, "target-coverage", "u", 0, + "Target % (0-100) of historical avg-hourly usage to cover with commitments. "+ + "When >0, sizes each rec to floor(avg * target/100) so projected coverage "+ + "approximates target and projected utilization stays near 100%, leaving "+ + "(100-target)% on-demand headroom. Overrides --coverage. Default 0 = disabled.") rootCmd.Flags().BoolVar(&toolCfg.ActualPurchase, "purchase", false, "Actually purchase RIs instead of just printing the data") rootCmd.Flags().StringVarP(&toolCfg.CSVOutput, "output", "o", "", "Output CSV file path (if not specified, auto-generates filename)") rootCmd.Flags().StringVarP(&toolCfg.CSVInput, "input-csv", "i", "", "Input CSV file with recommendations to purchase") @@ -125,6 +150,15 @@ func init() { rootCmd.Flags().Float64Var(&toolCfg.MinSavingsPct, "min-savings-pct", 0, "Minimum savings percentage to include a recommendation (0 = no filter)") rootCmd.Flags().IntVar(&toolCfg.MaxBreakEvenMonths, "max-break-even-months", 0, "Maximum break-even period in months (0 = no filter)") rootCmd.Flags().IntVar(&toolCfg.MinCount, "min-count", 0, "Minimum instance count to include a recommendation (0 = no filter)") + rootCmd.Flags().IntVar(&toolCfg.RebuyWindowDays, "rebuy-window-days", 0, + "When >0, treat existing RIs expiring within this many days as already "+ + "uncovered, so --target-coverage sizes recommendations to replace them "+ + "before they expire. Default 0 = trust existing coverage fully.") + rootCmd.Flags().Float64Var(&toolCfg.MinPoolSize, "min-pool-size", 0, + "When >0, drop RI recommendations for pools with AverageInstancesUsedPerHour "+ + "below this threshold. Useful with --target-coverage to skip tiny pools "+ + "that integer arithmetic forces above target (e.g. avg=1 cannot hit 80%%). "+ + "Default 0 = no filter.") } // Package-level Config that cobra flags bind to @@ -238,8 +272,22 @@ func createServiceClient(service common.ServiceType, cfg aws.Config) provider.Se } } -// generatePurchaseID creates a descriptive purchase ID with UUID for uniqueness -func generatePurchaseID(rec common.Recommendation, region string, _ int, isDryRun bool, coverage float64) string { +// effectiveSizingPct returns the sizing % actually applied to recommendations: +// cfg.TargetCoverage when set (>0), else cfg.Coverage. Use this when +// emitting human-facing labels (purchase IDs, audit-log fields) so the label +// reflects the value that drove the sizing, not the unused default. +func effectiveSizingPct(cfg Config) float64 { + if cfg.TargetCoverage > 0 { + return cfg.TargetCoverage + } + return cfg.Coverage +} + +// generatePurchaseID creates a descriptive purchase ID with UUID for uniqueness. +// sizingPct is the percentage that actually drove the sizing decision (see +// effectiveSizingPct); it appears in the ID as e.g. "80pct" purely for human +// readability and audit traceability. +func generatePurchaseID(rec common.Recommendation, region string, _ int, isDryRun bool, sizingPct float64) string { // Generate a short UUID suffix (first 8 characters) for uniqueness uuidSuffix := uuid.New().String()[:8] timestamp := time.Now().Format("20060102-150405") @@ -268,7 +316,7 @@ func generatePurchaseID(rec common.Recommendation, region string, _ int, isDryRu // Add account name if available accountName := sanitizeAccountName(rec.AccountName) - coveragePct := fmt.Sprintf("%.0fpct", coverage) + coveragePct := fmt.Sprintf("%.0fpct", sizingPct) if accountName != "" { if engine != "" { return fmt.Sprintf("%s-%s-%s-%s-%s-%s-%dx-%s-%s-%s", diff --git a/cmd/main_test.go b/cmd/main_test.go index 4e3ec887..79fdbd7b 100644 --- a/cmd/main_test.go +++ b/cmd/main_test.go @@ -636,6 +636,30 @@ func TestGeneratePurchaseIDCoverageVariations(t *testing.T) { } } +// TestEffectiveSizingPct locks the rule that target-coverage wins over +// coverage when both are configured. Without this, dry-run purchase IDs (and +// any other audit label that calls effectiveSizingPct) print the unused +// default Coverage=80 instead of the actual target value the user passed. +// Regression guard for the Aurora-target / RDS-target CLI dry-run runs. +func TestEffectiveSizingPct(t *testing.T) { + tests := []struct { + name string + cfg Config + want float64 + }{ + {"target unset, coverage default", Config{Coverage: 80}, 80}, + {"target unset, coverage custom", Config{Coverage: 50}, 50}, + {"target set, coverage default ignored", Config{TargetCoverage: 70, Coverage: 80}, 70}, + {"target set, coverage explicit ignored", Config{TargetCoverage: 95, Coverage: 30}, 95}, + {"target zero falls back to coverage", Config{TargetCoverage: 0, Coverage: 60}, 60}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, effectiveSizingPct(tt.cfg)) + }) + } +} + func TestParseServicesWithEmptyAndNil(t *testing.T) { // Empty slice result := parseServices([]string{}) diff --git a/cmd/multi_service.go b/cmd/multi_service.go index f8d5fa67..35f5c4e7 100644 --- a/cmd/multi_service.go +++ b/cmd/multi_service.go @@ -14,11 +14,57 @@ import ( "github.com/LeanerCloud/CUDly/pkg/provider" "github.com/LeanerCloud/CUDly/pkg/scorer" awsprovider "github.com/LeanerCloud/CUDly/providers/aws" + "github.com/LeanerCloud/CUDly/providers/aws/recommendations" "github.com/aws/aws-sdk-go-v2/aws" awsconfig "github.com/aws/aws-sdk-go-v2/config" "github.com/google/uuid" ) +// existingCoverageLookbackDays is the historical window we ask CE to +// summarise existing-RI coverage over for --target-coverage sizing. +// 30 days matches CE's UI default and the GetRIUtilization caller. +const existingCoverageLookbackDays = 30 + +// fetchExistingCoverage retrieves the existing-RI coverage map from Cost +// Explorer so --target-coverage sizing can subtract what's already owned in +// each pool. Best-effort: a transient CE failure logs a warning and returns +// an empty map, which the sizing path treats as "no signal" — recs sized +// without subtracting existing commitments. Skipping the fetch entirely +// when --target-coverage is not in play avoids the per-region CE charges +// for users on the --coverage path. +// +// Coverage is fetched per-region per-account so CE's org-wide aggregate +// doesn't bleed one account's coverage into another in multi-account orgs. +// Regions come from cfg.Regions if set, otherwise from EC2 DescribeRegions. +func fetchExistingCoverage(ctx context.Context, awsCfg aws.Config, recClient provider.RecommendationsClient, cfg Config) recommendations.PoolCoverageMap { + if cfg.TargetCoverage <= 0 { + return nil + } + adapter, ok := recClient.(*awsprovider.RecommendationsClientAdapter) + if !ok { + // Non-AWS provider: feature not wired up. Sizing degenerates to + // the no-existing-commitments path. + return nil + } + regions := cfg.Regions + if len(regions) == 0 { + allRegions, err := getAllAWSRegions(ctx, awsCfg) + if err != nil { + AppLogger.Printf(" ⚠️ Could not list AWS regions for coverage fetch (%v); skipping existing-coverage subtraction\n", err) + return nil + } + regions = allRegions + } + AppLogger.Printf("\n🔎 Fetching existing-RI coverage from Cost Explorer per-account across %d regions (lookback %d days)...\n", len(regions), existingCoverageLookbackDays) + cov, err := adapter.GetRICoverageMap(ctx, existingCoverageLookbackDays, regions) + if err != nil { + AppLogger.Printf(" ⚠️ Could not fetch existing-RI coverage (%v); sizing will assume zero existing coverage\n", err) + return nil + } + AppLogger.Printf(" ✅ Fetched coverage for %d (region, instance-type, engine, account) entries\n", len(cov)) + return cov +} + // shutdownRequested is set to true when SIGINT is received during a purchase run. var shutdownRequested atomic.Bool @@ -63,9 +109,16 @@ func runToolMultiService(ctx context.Context, cfg Config) { recClient := awsprovider.NewRecommendationsClient(awsCfg) engineData := fetchEngineVersionData(ctx, cfg) + // Fetch existing-RI coverage so --target-coverage can subtract what + // the user already owns. Best-effort: a failure here logs a warning + // and continues with an empty map, which makes sizing degenerate to + // the no-existing-commitments path (matches behavior when no recs + // are matched in the map). + coverageMap := fetchExistingCoverage(ctx, awsCfg, recClient, cfg) + // Phase 1: collect all recommendations without purchasing. AppLogger.Printf("\n📥 Fetching recommendations from all services...\n") - allRecs := fetchAllRecs(ctx, awsCfg, recClient, accountCache, servicesToProcess, engineData, cfg) + allRecs := fetchAllRecs(ctx, awsCfg, recClient, accountCache, servicesToProcess, engineData, cfg, coverageMap) // Phase 2: score and display. scoredResult := scoreAndDisplay(allRecs, cfg) @@ -343,11 +396,18 @@ func filterAndAdjustRecommendations(recommendations []common.Recommendation, csv AppLogger.Printf("🔍 After filters: %d recommendations (filtered out %d)\n", len(recommendations), originalCount-len(recommendations)) } - // Apply coverage if not 100% - if csvModeCoverage < 100 { - beforeCoverage := len(recommendations) - recommendations = applyCommonCoverage(recommendations, csvModeCoverage) - AppLogger.Printf("📈 Applying %.1f%% coverage: %d recommendations selected (from %d)\n", csvModeCoverage, len(recommendations), beforeCoverage) + // Apply sizing — target-coverage if set, otherwise coverage. + // Coverage 100% is a no-op (early-returned inside ApplyCoverage), but + // --target-coverage always applies even at coverage 100%, so the + // CSV-path short-circuit is conditional on TargetCoverage == 0. + if cfg.TargetCoverage > 0 || csvModeCoverage < 100 { + beforeSize := len(recommendations) + recommendations = applySizing(recommendations, cfg, csvModeCoverage) + if cfg.TargetCoverage > 0 { + AppLogger.Printf("🎯 Applying %.1f%% target-coverage: %d recommendations selected (from %d)\n", cfg.TargetCoverage, len(recommendations), beforeSize) + } else { + AppLogger.Printf("📈 Applying %.1f%% coverage: %d recommendations selected (from %d)\n", csvModeCoverage, len(recommendations), beforeSize) + } } // Apply count override if specified @@ -380,10 +440,13 @@ func processService(ctx context.Context, awsCfg aws.Config, recClient provider.R serviceResults := make([]common.PurchaseResult, 0) for i, region := range regionsToProcess { + // Legacy single-service entry point — no coverage map is fetched here, + // so sizing falls back to the no-existing-commitments formula. The new + // path (runToolMultiService) fetches coverage once and threads it through. regionResult := processRegionRecommendations( ctx, awsCfg, recClient, accountCache, service, region, i+1, len(regionsToProcess), - engineData, isDryRun, cfg, + engineData, isDryRun, cfg, nil, ) serviceRecs = append(serviceRecs, regionResult.recommendations...) serviceResults = append(serviceResults, regionResult.results...) diff --git a/cmd/multi_service_csv.go b/cmd/multi_service_csv.go index 4cb8682d..4ea720fd 100644 --- a/cmd/multi_service_csv.go +++ b/cmd/multi_service_csv.go @@ -6,9 +6,11 @@ import ( "io" "log" "os" + "sort" "time" "github.com/LeanerCloud/CUDly/pkg/common" + "github.com/LeanerCloud/CUDly/providers/aws/recommendations" ) // determineCSVCoverage determines the coverage percentage to use for CSV mode @@ -166,18 +168,44 @@ func writeMultiServiceCSVReport(results []common.PurchaseResult, filepath string writer := csv.NewWriter(file) defer writer.Flush() - // Write header + // Write header. RecommendedCount shows AWS's pre-sizing count alongside + // Count (the post-sizing value); UpfrontPayment is rec.CommitmentCost, + // which ApplyCoverage / ApplyTargetCoverage now scale at sizing time so + // the value already reflects the sized purchase. ExistingCoverage shows + // the % of demand already covered by commitments in the same pool (from + // CE GetReservationCoverage); ProjectedCoverage shows where the purchase + // landed (total coverage after adding the new RIs). All four optional + // columns render blank when zero so users on the straight --coverage + // path don't see noise. ProjectedUtilization and RecommendedUtilization + // are NOT emitted because under under-buy sizing both land at ~100% on + // every row, which adds noise without information; the underlying fields + // stay on the Recommendation struct for internal use (SP no-signal + // guard, etc.). header := []string{ - "Service", "Region", "ResourceType", "Count", "Account", "AccountName", - "Term", "PaymentOption", "EstimatedSavings", "CommitmentID", - "Success", "Error", "Timestamp", + "Service", "Region", "ResourceType", "Family", "Engine", "Deployment", + "Instances", "CoveredInstances", + "Count", "NormalizedUnits", "RecommendedCount", + "Account", "AccountName", "Term", "PaymentOption", + "UpfrontPayment", "RecurringMonthlyCost", "EstimatedSavings", + "CommitmentID", "Success", "Error", "Timestamp", + "ExistingCoverage", "ProjectedCoverage", } if err := writer.Write(header); err != nil { return fmt.Errorf("failed to write CSV header: %w", err) } - // Write data rows - for _, r := range results { + // Sort by upfront DESC so the biggest-dollar decisions surface at + // the top of the file rather than wherever AWS rec API happened to + // return them. Operators reading top-down see the rows that matter + // most for budget review first. Copy the slice so the caller's + // ordering isn't mutated (some callers iterate results twice). + sorted := make([]common.PurchaseResult, len(results)) + copy(sorted, results) + sort.SliceStable(sorted, func(i, j int) bool { + return sorted[i].Recommendation.CommitmentCost > sorted[j].Recommendation.CommitmentCost + }) + + for _, r := range sorted { rec := r.Recommendation errStr := "" if r.Error != nil { @@ -188,21 +216,259 @@ func writeMultiServiceCSVReport(results []common.PurchaseResult, filepath string string(rec.Service), rec.Region, rec.ResourceType, + extractRDSFamily(rec), + extractEngine(rec), + extractDeployment(rec), + formatAvgInstancesOrBlank(rec.AverageInstancesUsedPerHour), + formatCoveredInstancesOrBlank(rec), fmt.Sprintf("%d", rec.Count), + formatNormalizedUnitsOrBlank(rec), + formatIntOrBlank(rec.RecommendedCount), rec.Account, rec.AccountName, rec.Term, rec.PaymentOption, + formatCurrencyOrBlank(rec.CommitmentCost), + formatRecurringMonthlyOrBlank(rec.RecurringMonthlyCost), fmt.Sprintf("%.2f", rec.EstimatedSavings), r.CommitmentID, fmt.Sprintf("%t", r.Success), errStr, r.Timestamp.Format(time.RFC3339), + formatExistingCoverage(rec), + formatPercentOrBlank(rec.ProjectedCoverage), } if err := writer.Write(row); err != nil { return fmt.Errorf("failed to write CSV row: %w", err) } } + // TOTAL row aggregates the sum-able fields (Count, NormalizedUnits, + // UpfrontPayment, RecurringMonthlyCost, EstimatedSavings) so operators + // don't have to recompute in a spreadsheet. The "TOTAL" label lands in + // the Service column for easy spotting; columns that don't aggregate + // meaningfully (per-rec identifiers, timestamps, %) stay blank. + if len(sorted) > 0 { + totalRow := buildTotalRow(sorted) + if err := writer.Write(totalRow); err != nil { + return fmt.Errorf("failed to write CSV total row: %w", err) + } + } + return nil } + +// buildTotalRow sums the count + currency columns across results and +// returns a row aligned to the same header order as writeCSVRowsOrdered. +// Non-summable cells (per-rec identifiers, percentages, timestamps) are +// blank; the "TOTAL" label lands in Service so the row reads as a +// summary at first glance. +func buildTotalRow(results []common.PurchaseResult) []string { + var totalCount int + var totalNU, totalUpfront, totalRecurring, totalSavings float64 + hasRecurring := false + for _, r := range results { + totalCount += r.Recommendation.Count + totalNU += float64(r.Recommendation.Count) * recommendations.RDSInstanceNUFromType(r.Recommendation.ResourceType) + totalUpfront += r.Recommendation.CommitmentCost + totalSavings += r.Recommendation.EstimatedSavings + if r.Recommendation.RecurringMonthlyCost != nil { + totalRecurring += *r.Recommendation.RecurringMonthlyCost + hasRecurring = true + } + } + recurringCell := "" + if hasRecurring { + recurringCell = fmt.Sprintf("%.2f", totalRecurring) + } + nuCell := "" + if totalNU > 0 { + nuCell = fmt.Sprintf("%g", totalNU) + } + return []string{ + "TOTAL", "", "", "", "", "", // Service through Deployment + "", "", // Instances, CoveredInstances + fmt.Sprintf("%d", totalCount), nuCell, "", // Count, NormalizedUnits, RecommendedCount + "", "", "", "", // Account, AccountName, Term, PaymentOption + fmt.Sprintf("%.2f", totalUpfront), recurringCell, fmt.Sprintf("%.2f", totalSavings), + "", "", "", "", // CommitmentID, Success, Error, Timestamp + "", "", // ExistingCoverage, ProjectedCoverage + } +} + +// formatIntOrBlank renders an int as its decimal string when non-zero, "" +// otherwise. SP recommendations leave RecommendedCount at zero (SPs are +// dollar-denominated, not count-denominated), so blanking matches the +// "0 = unknown / not applicable" convention used elsewhere in the CSV. +func formatIntOrBlank(v int) string { + if v == 0 { + return "" + } + return fmt.Sprintf("%d", v) +} + +// extractRDSFamily returns the RDS instance-family prefix (e.g. +// "db.r7g") for an RDS recommendation, empty for any service whose +// instance type doesn't follow the RDS three-part naming. Useful for +// grouping rows in the CSV by family-NU bucket so operators can see at +// a glance which recs belong to the same size-flex family. +func extractRDSFamily(rec common.Recommendation) string { + if rec.Service != common.ServiceRDS && rec.Service != common.ServiceRelationalDB { + return "" + } + return recommendations.RDSFamilyFromType(rec.ResourceType) +} + +// formatNormalizedUnitsOrBlank renders the per-rec NU contribution +// (rec.Count × NU(size)) for RDS rows: e.g. 15 × db.r7g.large = 60 NU. +// Surfaces the size-flex math AWS rec API uses to bundle family demand +// into a single rec at one size — without this column, operators have +// to compute NU by hand to verify the bundling. Renders blank for +// non-RDS rows and for sizes not in the standard NU scale. +func formatNormalizedUnitsOrBlank(rec common.Recommendation) string { + if rec.Service != common.ServiceRDS && rec.Service != common.ServiceRelationalDB { + return "" + } + nu := recommendations.RDSInstanceNUFromType(rec.ResourceType) + if nu == 0 || rec.Count == 0 { + return "" + } + return fmt.Sprintf("%g", float64(rec.Count)*nu) +} + +// extractDeployment returns the RDS deployment-option string +// ("single-az" / "multi-az") for an RDS recommendation, empty for any +// service that doesn't carry a deployment dimension. Critical for RDS +// price verification: Multi-AZ list prices are roughly 2x Single-AZ, so +// operators need to see the deployment alongside the upfront figure to +// confirm a $X upfront row is for the deployment they expect. +// +// Both value and pointer Details are accepted to mirror extractEngine +// (parser path stores pointers; CSV-loader path constructs values). +func extractDeployment(rec common.Recommendation) string { + switch details := rec.Details.(type) { + case *common.DatabaseDetails: + if details != nil { + return details.AZConfig + } + case common.DatabaseDetails: + return details.AZConfig + } + return "" +} + +// extractEngine returns the engine / platform string for a recommendation's +// polymorphic Details: Engine for RDS / ElastiCache (DatabaseDetails, +// CacheDetails), Platform for EC2 (ComputeDetails), empty for SP and other +// commitment types that don't carry an engine field. +// +// Both value and pointer Details are accepted because the parser stores +// *DatabaseDetails / *CacheDetails / *ComputeDetails while the CSV-loader +// path constructs the value forms; the dispatch in generatePurchaseID does +// the same trick. Without the pointer cases the column silently blanks +// every row coming from the live parser path. +func extractEngine(rec common.Recommendation) string { + switch details := rec.Details.(type) { + case *common.DatabaseDetails: + if details != nil { + return details.Engine + } + case common.DatabaseDetails: + return details.Engine + case *common.CacheDetails: + if details != nil { + return details.Engine + } + case common.CacheDetails: + return details.Engine + case *common.ComputeDetails: + if details != nil { + return details.Platform + } + case common.ComputeDetails: + return details.Platform + } + return "" +} + +// formatExistingCoverage renders the existing-RI coverage cell with +// three distinct states: +// - "n/a" when CE returned no data for the rec's pool (rec parser was +// able to surface a recommendation from some other signal but CE's +// coverage view doesn't see the pool yet — e.g. recently-launched +// instances within CUDly's run window but outside CE's lookback) +// - "0.0" when CE confirms the pool exists but has zero RI coverage +// (the legitimate "buy for uncovered demand" case) +// - "X.X" with one decimal for any non-zero coverage percentage +// +// Previously both the no-data and the genuine-zero cases rendered as a +// blank cell, conflating "we don't know" with "definitely zero" and +// making it impossible to spot pools where the CE signal was missing. +func formatExistingCoverage(rec common.Recommendation) string { + if !rec.ExistingCoverageKnown { + return "n/a" + } + return fmt.Sprintf("%.1f", rec.ExistingCoveragePct) +} + +// formatRecurringMonthlyOrBlank renders rec.RecurringMonthlyCost (the +// per-month fee on top of any upfront payment, populated by the AWS +// parser when CE returns RecurringStandardMonthlyCost). Distinguishes +// "no recurring fee" (all-upfront RIs, where the pointer is set to +// zero) from "unknown" (pointer is nil because CE didn't return the +// field): zero renders as "0.00", nil renders as blank. +// +// Operators on partial-upfront / no-upfront plans need this to compute +// total cost (upfront + monthly × 36); without it the CSV only shows +// the upfront portion and over-states ROI. +func formatRecurringMonthlyOrBlank(p *float64) string { + if p == nil { + return "" + } + return fmt.Sprintf("%.2f", *p) +} + +// formatCurrencyOrBlank renders a currency value as "%.2f" when non-zero, +// "" otherwise. Used for UpfrontPayment so a no-upfront / unknown-upfront +// rec renders as a blank cell rather than "$0.00". +func formatCurrencyOrBlank(v float64) string { + if v == 0 { + return "" + } + return fmt.Sprintf("%.2f", v) +} + +// formatAvgInstancesOrBlank renders the average instances-per-hour signal +// (AverageInstancesUsedPerHour from CE) with one decimal so operators can +// see the pool's running demand without losing the fractional precision +// CE returns. Blank when zero, matching the "0 = no signal" convention. +func formatAvgInstancesOrBlank(v float64) string { + if v == 0 { + return "" + } + return fmt.Sprintf("%.1f", v) +} + +// formatCoveredInstancesOrBlank renders the instances in the pool already +// covered by existing commitments: avg × existing_coverage / 100. Useful +// next to Instances so operators can read "you have X running, Y are +// already covered, this rec adds N more" without doing the arithmetic. +// Blank when either signal is zero (we can't compute a meaningful value). +func formatCoveredInstancesOrBlank(rec common.Recommendation) string { + if rec.AverageInstancesUsedPerHour <= 0 || rec.ExistingCoveragePct <= 0 { + return "" + } + covered := rec.AverageInstancesUsedPerHour * rec.ExistingCoveragePct / 100.0 + return fmt.Sprintf("%.1f", covered) +} + +// formatPercentOrBlank renders a % value as "%.1f" when non-zero, "" otherwise. +// Zero means "unknown / not applicable" — we don't want "0.0" in cells where +// the metric simply wasn't computed (e.g. ProjectedCoverage for SP rows, or +// any utilization field when --target-coverage wasn't used). +func formatPercentOrBlank(v float64) string { + if v == 0 { + return "" + } + return fmt.Sprintf("%.1f", v) +} diff --git a/cmd/multi_service_csv_test.go b/cmd/multi_service_csv_test.go index 54684164..ec1144be 100644 --- a/cmd/multi_service_csv_test.go +++ b/cmd/multi_service_csv_test.go @@ -1,9 +1,11 @@ package main import ( + "encoding/csv" "errors" "os" "path/filepath" + "strings" "testing" "time" @@ -186,6 +188,388 @@ func TestWriteMultiServiceCSVReport(t *testing.T) { } } +// TestWriteMultiServiceCSVReport_CoverageColumn confirms the ProjectedCoverage +// and RecommendedCount columns added for --target-coverage (#338) are emitted +// with the "blank-when-zero" formatting (matches the "0 = unknown" convention +// shared with the JSON-level omitempty tags). The sibling ProjectedUtilization +// and RecommendedUtilization fields are intentionally NOT emitted to CSV +// (both land at ~100% on every under-buy row, so they add noise without +// information). +func TestWriteMultiServiceCSVReport_CoverageColumn(t *testing.T) { + tmpDir := t.TempDir() + filepath := tmpDir + "/util.csv" + + results := []common.PurchaseResult{ + { + Recommendation: common.Recommendation{ + Service: common.ServiceEC2, + Region: "us-east-1", + ResourceType: "t3.medium", + Count: 7, // post-sizing + RecommendedCount: 10, // AWS pre-sizing + CommitmentCost: 700, // already scaled at sizing time + Term: "1yr", + ProjectedUtilization: 95.0, + ProjectedCoverage: 87.5, + ExistingCoveragePct: 20.0, + ExistingCoverageKnown: true, + RecommendedUtilization: 80.0, + AverageInstancesUsedPerHour: 10.0, + // Pointer form matches the live parser (parser_services.go + // stores &common.ComputeDetails{...}); extractEngine must + // handle both pointer and value Details. + Details: &common.ComputeDetails{Platform: "Linux/UNIX"}, + }, + Success: true, + }, + { + // All sizing-related fields zero — ProjectedCoverage and + // RecommendedCount cells should both be blank (SP rec or a + // pre-target rec that never went through sizing). UpfrontPayment + // is also blank when CommitmentCost is zero. No Details either, + // so the Engine column is blank. + Recommendation: common.Recommendation{ + Service: common.ServiceEC2, + Region: "us-east-1", + ResourceType: "m5.large", + Count: 5, + Term: "1yr", + }, + Success: true, + }, + } + + err := writeMultiServiceCSVReport(results, filepath) + require.NoError(t, err) + + content, err := os.ReadFile(filepath) + require.NoError(t, err) + csvText := string(content) + + // Header contains ProjectedCoverage, ExistingCoverage, RecommendedCount, + // and UpfrontPayment but NOT the always-100% utilization siblings. + assert.Contains(t, csvText, "ProjectedCoverage") + assert.Contains(t, csvText, "ExistingCoverage") + assert.Contains(t, csvText, "RecommendedCount") + assert.Contains(t, csvText, "UpfrontPayment") + assert.NotContains(t, csvText, "ProjectedUtilization", "column was removed; it's ~100% on every under-buy row") + assert.NotContains(t, csvText, "RecommendedUtilization", "column was removed; it's ~99-100% on every row") + + // First data row (populated rec) has the coverage and AWS-count values. + assert.Contains(t, csvText, "87.5", "ProjectedCoverage should render with one decimal") + + r := csv.NewReader(strings.NewReader(csvText)) + rows, err := r.ReadAll() + require.NoError(t, err) + // Header + 2 data rows + 1 TOTAL row. + require.Len(t, rows, 4) + // TOTAL row sits at the bottom with "TOTAL" in the Service column and + // summed Count / UpfrontPayment / EstimatedSavings. + totalRow := rows[3] + assert.Equal(t, "TOTAL", totalRow[0], "TOTAL label lands in Service column") + // Data rows are sorted by UpfrontPayment DESC; populated rec ($700) + // comes before the empty rec ($0). + header := rows[0] + idxProjCov, idxRecCount, idxUpfront, idxExisting := -1, -1, -1, -1 + idxEngine, idxInstances, idxCovered := -1, -1, -1 + for i, h := range header { + switch h { + case "ProjectedCoverage": + idxProjCov = i + case "RecommendedCount": + idxRecCount = i + case "UpfrontPayment": + idxUpfront = i + case "ExistingCoverage": + idxExisting = i + case "Engine": + idxEngine = i + case "Instances": + idxInstances = i + case "CoveredInstances": + idxCovered = i + } + } + require.NotEqual(t, -1, idxProjCov, "ProjectedCoverage column not found") + require.NotEqual(t, -1, idxRecCount, "RecommendedCount column not found") + require.NotEqual(t, -1, idxUpfront, "UpfrontPayment column not found") + require.NotEqual(t, -1, idxExisting, "ExistingCoverage column not found") + require.NotEqual(t, -1, idxEngine, "Engine column not found") + require.NotEqual(t, -1, idxInstances, "Instances column not found") + require.NotEqual(t, -1, idxCovered, "CoveredInstances column not found") + + // Populated row: RecommendedCount=10 renders as "10", UpfrontPayment + // emits CommitmentCost as-is (sizing already scaled it; see + // ApplyTargetCoverage), ProjectedCoverage=87.5 renders, ExistingCoverage=20.0, + // Engine pulled from *ComputeDetails.Platform. Instances = avg = 10.0. + // CoveredInstances = 10.0 × 20% = 2.0. + populatedRow := rows[1] + assert.Equal(t, "10", populatedRow[idxRecCount], "RecommendedCount should render as decimal") + assert.Equal(t, "700.00", populatedRow[idxUpfront], "UpfrontPayment should render rec.CommitmentCost as-is") + assert.Equal(t, "87.5", populatedRow[idxProjCov]) + assert.Equal(t, "20.0", populatedRow[idxExisting], "ExistingCoverage should render with one decimal") + assert.Equal(t, "Linux/UNIX", populatedRow[idxEngine], "Engine should pull from *ComputeDetails.Platform") + assert.Equal(t, "10.0", populatedRow[idxInstances], "Instances should render avg with one decimal") + assert.Equal(t, "2.0", populatedRow[idxCovered], "CoveredInstances = avg * existing_cov / 100") + + // Zero-fields row: optional cells blank, Engine blank when Details is nil. + // ExistingCoverage shows "n/a" because ExistingCoverageKnown wasn't set: + // CE had no data for this pool (distinct from "0% covered", which would + // be ExistingCoverageKnown=true, Pct=0 rendering as "0.0"). + zeroRow := rows[2] + assert.Equal(t, "", zeroRow[idxProjCov], "zero ProjectedCoverage should be blank") + assert.Equal(t, "", zeroRow[idxRecCount], "zero RecommendedCount should be blank (SP rec or pre-sizing)") + assert.Equal(t, "", zeroRow[idxUpfront], "zero CommitmentCost should leave UpfrontPayment blank") + assert.Equal(t, "n/a", zeroRow[idxExisting], "ExistingCoverage should render n/a when CE had no signal") + assert.Equal(t, "", zeroRow[idxEngine], "missing Details should leave Engine blank") + assert.Equal(t, "", zeroRow[idxInstances], "zero avg should leave Instances blank") + assert.Equal(t, "", zeroRow[idxCovered], "missing avg or existing_cov should leave CoveredInstances blank") +} + +// TestWriteMultiServiceCSVReport_SortAndTotal confirms data rows are +// sorted by UpfrontPayment DESC and that a TOTAL summary row lands at +// the bottom with the column sums. Operators reading the file top-down +// want the biggest-dollar decisions surfaced first; the TOTAL row +// removes the need to copy-paste columns into a spreadsheet to add +// them up. +func TestWriteMultiServiceCSVReport_SortAndTotal(t *testing.T) { + tmpDir := t.TempDir() + fp := tmpDir + "/sort-total.csv" + + // Three recs: $5K, $20K, $1K upfront. After DESC sort the order + // should be 20K, 5K, 1K. + results := []common.PurchaseResult{ + {Recommendation: common.Recommendation{Service: "rds", ResourceType: "db.r6g.large", Count: 5, CommitmentCost: 5000, EstimatedSavings: 500}}, + {Recommendation: common.Recommendation{Service: "rds", ResourceType: "db.r6g.2xlarge", Count: 2, CommitmentCost: 20000, EstimatedSavings: 1500}}, + {Recommendation: common.Recommendation{Service: "rds", ResourceType: "db.t4g.medium", Count: 4, CommitmentCost: 1000, EstimatedSavings: 80}}, + } + require.NoError(t, writeMultiServiceCSVReport(results, fp)) + content, err := os.ReadFile(fp) + require.NoError(t, err) + + r := csv.NewReader(strings.NewReader(string(content))) + rows, err := r.ReadAll() + require.NoError(t, err) + require.Len(t, rows, 5) // header + 3 data + TOTAL + + // Find UpfrontPayment column index. + header := rows[0] + idxUpfront := -1 + idxCount := -1 + idxService := -1 + idxNU := -1 + idxSavings := -1 + for i, h := range header { + switch h { + case "UpfrontPayment": + idxUpfront = i + case "Count": + idxCount = i + case "Service": + idxService = i + case "NormalizedUnits": + idxNU = i + case "EstimatedSavings": + idxSavings = i + } + } + + // Sort order: $20K, $5K, $1K. + assert.Equal(t, "20000.00", rows[1][idxUpfront], "row 1 has the largest upfront") + assert.Equal(t, "5000.00", rows[2][idxUpfront]) + assert.Equal(t, "1000.00", rows[3][idxUpfront]) + + // TOTAL row aggregates: count=11, upfront=$26K, savings=$2,080. + // NU = 5×4 + 2×16 + 4×2 = 20 + 32 + 8 = 60. + totalRow := rows[4] + assert.Equal(t, "TOTAL", totalRow[idxService]) + assert.Equal(t, "11", totalRow[idxCount]) + assert.Equal(t, "60", totalRow[idxNU]) + assert.Equal(t, "26000.00", totalRow[idxUpfront]) + assert.Equal(t, "2080.00", totalRow[idxSavings]) +} + +// TestFormatExistingCoverage locks the three-state rendering: +// - ExistingCoverageKnown=false → "n/a" (CE has no data for this pool) +// - ExistingCoverageKnown=true, Pct=0 → "0.0" (CE confirms zero coverage) +// - ExistingCoverageKnown=true, Pct>0 → formatted with one decimal +// +// Critical for operators interpreting the column: a blank or zero cell +// previously meant either "CE was queried but returned 0%" or "CE +// returned nothing", with no way to tell which. +func TestFormatExistingCoverage(t *testing.T) { + tests := []struct { + name string + rec common.Recommendation + want string + }{ + {"unknown (CE no data)", common.Recommendation{}, "n/a"}, + {"known zero coverage", common.Recommendation{ExistingCoverageKnown: true}, "0.0"}, + {"known partial coverage", common.Recommendation{ExistingCoverageKnown: true, ExistingCoveragePct: 37.74}, "37.7"}, + {"known full coverage", common.Recommendation{ExistingCoverageKnown: true, ExistingCoveragePct: 100.0}, "100.0"}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, formatExistingCoverage(tt.rec)) + }) + } +} + +// TestFormatRecurringMonthlyOrBlank locks the nil-vs-zero distinction: +// nil pointer (AWS API didn't return RecurringStandardMonthlyCost) +// renders as blank, zero value (genuinely no monthly fee, e.g. +// all-upfront RIs) renders as "0.00". Operators need to tell "we don't +// know" apart from "definitely zero" to compute total cost correctly. +func TestFormatRecurringMonthlyOrBlank(t *testing.T) { + zero := 0.0 + twenty := 20.5 + tests := []struct { + name string + in *float64 + want string + }{ + {"nil → blank (unknown)", nil, ""}, + {"zero pointer → 0.00 (definitely zero)", &zero, "0.00"}, + {"non-zero → formatted", &twenty, "20.50"}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, formatRecurringMonthlyOrBlank(tt.in)) + }) + } +} + +// TestExtractRDSFamily covers the family-prefix extraction used by the +// CSV writer to group rows by size-flex family. +func TestExtractRDSFamily(t *testing.T) { + tests := []struct { + name string + rec common.Recommendation + want string + }{ + {"RDS db.r7g.large", common.Recommendation{Service: common.ServiceRDS, ResourceType: "db.r7g.large"}, "db.r7g"}, + {"RDS db.t4g.medium", common.Recommendation{Service: common.ServiceRDS, ResourceType: "db.t4g.medium"}, "db.t4g"}, + {"RelationalDB alias", common.Recommendation{Service: common.ServiceRelationalDB, ResourceType: "db.m5.xlarge"}, "db.m5"}, + // Non-RDS services blank even when ResourceType looks RDS-shaped. + {"EC2 ignored", common.Recommendation{Service: common.ServiceEC2, ResourceType: "m5.large"}, ""}, + {"ElastiCache ignored", common.Recommendation{Service: common.ServiceElastiCache, ResourceType: "cache.t3.micro"}, ""}, + // Malformed RDS type. + {"RDS bare type", common.Recommendation{Service: common.ServiceRDS, ResourceType: "db.r7g"}, ""}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, extractRDSFamily(tt.rec)) + }) + } +} + +// TestFormatNormalizedUnitsOrBlank confirms NU values land for RDS rows +// with known sizes and stay blank for non-RDS / zero-count / unknown-size +// inputs, matching the "0/empty = unknown" convention used elsewhere. +func TestFormatNormalizedUnitsOrBlank(t *testing.T) { + tests := []struct { + name string + rec common.Recommendation + want string + }{ + // 15 × db.r7g.large = 15 × 4 NU = 60 NU + {"r7g.large × 15", common.Recommendation{Service: common.ServiceRDS, ResourceType: "db.r7g.large", Count: 15}, "60"}, + // 3 × db.t4g.medium = 3 × 2 NU = 6 NU + {"t4g.medium × 3", common.Recommendation{Service: common.ServiceRDS, ResourceType: "db.t4g.medium", Count: 3}, "6"}, + // Fractional NU survives via %g (db.t4g.micro = 0.5 NU) + {"t4g.micro × 3", common.Recommendation{Service: common.ServiceRDS, ResourceType: "db.t4g.micro", Count: 3}, "1.5"}, + // Non-RDS service → blank + {"EC2 row blank", common.Recommendation{Service: common.ServiceEC2, ResourceType: "m5.large", Count: 5}, ""}, + // Zero count → blank + {"zero count blank", common.Recommendation{Service: common.ServiceRDS, ResourceType: "db.r7g.large", Count: 0}, ""}, + // Unknown size → blank + {"unknown size blank", common.Recommendation{Service: common.ServiceRDS, ResourceType: "db.r7g.bogus", Count: 5}, ""}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, formatNormalizedUnitsOrBlank(tt.rec)) + }) + } +} + +// TestExtractDeployment covers the deployment-extraction helper used by +// the RDS row in the CSV. Single-AZ / Multi-AZ is critical context for +// pricing verification (Multi-AZ list price is ~2x Single-AZ) so the +// column should land for every RDS rec regardless of which Details form +// the upstream path used. +func TestExtractDeployment(t *testing.T) { + tests := []struct { + name string + rec common.Recommendation + want string + }{ + {"*DatabaseDetails Single-AZ", common.Recommendation{Details: &common.DatabaseDetails{AZConfig: "single-az"}}, "single-az"}, + {"*DatabaseDetails Multi-AZ", common.Recommendation{Details: &common.DatabaseDetails{AZConfig: "multi-az"}}, "multi-az"}, + {"DatabaseDetails (value) Multi-AZ", common.Recommendation{Details: common.DatabaseDetails{AZConfig: "multi-az"}}, "multi-az"}, + {"DatabaseDetails empty AZConfig", common.Recommendation{Details: &common.DatabaseDetails{Engine: "mysql"}}, ""}, + // Non-RDS Details → blank (column is RDS-only data). + {"CacheDetails -> empty", common.Recommendation{Details: &common.CacheDetails{Engine: "redis"}}, ""}, + {"ComputeDetails -> empty", common.Recommendation{Details: &common.ComputeDetails{Platform: "Linux/UNIX"}}, ""}, + {"nil Details -> empty", common.Recommendation{}, ""}, + {"nil *DatabaseDetails -> empty", common.Recommendation{Details: (*common.DatabaseDetails)(nil)}, ""}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, extractDeployment(tt.rec)) + }) + } +} + +// TestExtractEngine covers the four cases the helper dispatches on: +// DatabaseDetails (RDS engine), CacheDetails (ElastiCache engine), +// ComputeDetails (EC2 platform), and unset/other Details (blank). +func TestExtractEngine(t *testing.T) { + tests := []struct { + name string + rec common.Recommendation + want string + }{ + // Pointer forms — what the live parser actually emits. + {"*DatabaseDetails -> Engine", common.Recommendation{Details: &common.DatabaseDetails{Engine: "aurora-postgresql"}}, "aurora-postgresql"}, + {"*CacheDetails -> Engine", common.Recommendation{Details: &common.CacheDetails{Engine: "redis"}}, "redis"}, + {"*ComputeDetails -> Platform", common.Recommendation{Details: &common.ComputeDetails{Platform: "Linux/UNIX"}}, "Linux/UNIX"}, + // Value forms — what the CSV-loader path constructs. + {"DatabaseDetails (value) -> Engine", common.Recommendation{Details: common.DatabaseDetails{Engine: "mysql"}}, "mysql"}, + {"CacheDetails (value) -> Engine", common.Recommendation{Details: common.CacheDetails{Engine: "memcached"}}, "memcached"}, + {"ComputeDetails (value) -> Platform", common.Recommendation{Details: common.ComputeDetails{Platform: "Windows"}}, "Windows"}, + // Fallbacks. + {"nil Details -> empty", common.Recommendation{}, ""}, + {"SavingsPlanDetails -> empty", common.Recommendation{Details: &common.SavingsPlanDetails{HourlyCommitment: 1.0}}, ""}, + {"nil *DatabaseDetails -> empty", common.Recommendation{Details: (*common.DatabaseDetails)(nil)}, ""}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, extractEngine(tt.rec)) + }) + } +} + +// TestFormatCurrencyOrBlank locks the blank-when-zero behaviour for the +// UpfrontPayment column. Non-zero renders with two decimals; zero renders +// as an empty cell so users can distinguish "no upfront due" from "actual +// $0 upfront", consistent with the rest of the optional CSV columns. +func TestFormatCurrencyOrBlank(t *testing.T) { + tests := []struct { + name string + in float64 + want string + }{ + {"non-zero renders with two decimals", 1234.56, "1234.56"}, + {"integer value gets .00", 700, "700.00"}, + {"zero blanks the cell", 0, ""}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, formatCurrencyOrBlank(tt.in)) + }) + } +} + // Tests for loadRecommendationsFromCSV function func TestLoadRecommendationsFromCSV(t *testing.T) { tests := []struct { diff --git a/cmd/multi_service_filters.go b/cmd/multi_service_filters.go index 4f99ae99..da3ffdad 100644 --- a/cmd/multi_service_filters.go +++ b/cmd/multi_service_filters.go @@ -22,7 +22,10 @@ func applyFilters(recs []common.Recommendation, cfg Config, instanceVersions map return filtered } -// processRecommendation applies all filters to a recommendation and returns the adjusted recommendation and whether to include it +// processRecommendation applies all filters to a recommendation and returns +// the adjusted recommendation and whether to include it. The flat +// boolean-filter checks are delegated to passesDimensionFilters to keep +// this function under gocyclo's complexity threshold. func processRecommendation(rec common.Recommendation, cfg Config, instanceVersions map[string][]InstanceEngineVersion, versionInfo map[string]MajorEngineVersionInfo, currentRegion string) (common.Recommendation, bool) { // Filter to only recommendations for the current region being processed // This prevents duplicating recommendations across all regions @@ -31,20 +34,7 @@ func processRecommendation(rec common.Recommendation, cfg Config, instanceVersio return rec, false } - // Apply basic filters - if !shouldIncludeRegion(rec.Region, cfg) { - return rec, false - } - - if !shouldIncludeInstanceType(rec.ResourceType, cfg) { - return rec, false - } - - if !shouldIncludeEngine(rec, cfg) { - return rec, false - } - - if !shouldIncludeAccount(rec.AccountName, cfg) { + if !passesDimensionFilters(rec, cfg) { return rec, false } @@ -60,6 +50,52 @@ func processRecommendation(rec common.Recommendation, cfg Config, instanceVersio return rec, true } +// passesDimensionFilters runs the stateless include/exclude checks on +// region, instance type, engine, account, and pool size. Returns false on +// the first failing filter. Split out of processRecommendation to keep +// each function's cyclomatic complexity under the gocyclo limit; the +// dimension filters here are pure functions of rec + cfg with no side +// effects. +func passesDimensionFilters(rec common.Recommendation, cfg Config) bool { + if !shouldIncludeRegion(rec.Region, cfg) { + return false + } + if !shouldIncludeInstanceType(rec.ResourceType, cfg) { + return false + } + if !shouldIncludeEngine(rec, cfg) { + return false + } + if !shouldIncludeAccount(rec.AccountName, cfg) { + return false + } + if !shouldIncludePoolSize(rec, cfg) { + return false + } + return true +} + +// shouldIncludePoolSize filters out RI recommendations for pools whose +// AverageInstancesUsedPerHour is below cfg.MinPoolSize. The purpose is to +// drop tiny pools where integer-arithmetic sizing forces 100% coverage +// regardless of --target-coverage (e.g. avg=1 with target=80% → floor(0.8)=0 +// drops, ceil(0.8)=1 over-covers). Setting --min-pool-size=2 keeps pools +// where target can be meaningfully approximated. +// +// Pass-through cases: filter disabled (MinPoolSize<=0), or rec has no +// per-hour signal (avg<=0 — SPs and recs CE didn't return usage for). +// Those pools aren't sized via the per-hour formula so the filter doesn't +// apply to them. +func shouldIncludePoolSize(rec common.Recommendation, cfg Config) bool { + if cfg.MinPoolSize <= 0 { + return true + } + if rec.AverageInstancesUsedPerHour <= 0 { + return true + } + return rec.AverageInstancesUsedPerHour >= cfg.MinPoolSize +} + // shouldIncludeRegion checks if a region should be included based on filters func shouldIncludeRegion(region string, cfg Config) bool { // If include list is specified, region must be in it diff --git a/cmd/multi_service_helpers.go b/cmd/multi_service_helpers.go index ead3a450..8118de96 100644 --- a/cmd/multi_service_helpers.go +++ b/cmd/multi_service_helpers.go @@ -10,6 +10,7 @@ import ( "github.com/LeanerCloud/CUDly/pkg/common" "github.com/LeanerCloud/CUDly/pkg/provider" + "github.com/LeanerCloud/CUDly/providers/aws/recommendations" "github.com/aws/aws-sdk-go-v2/aws" awsec2 "github.com/aws/aws-sdk-go-v2/service/ec2" ) @@ -205,7 +206,7 @@ func createDryRunResult(rec common.Recommendation, region string, index int, cfg return common.PurchaseResult{ Recommendation: rec, Success: true, - CommitmentID: generatePurchaseID(rec, region, index, true, cfg.Coverage), + CommitmentID: generatePurchaseID(rec, region, index, true, effectiveSizingPct(cfg)), DryRun: true, Timestamp: time.Now(), } @@ -218,7 +219,7 @@ func createCancelledResults(recs []common.Recommendation, region string, cfg Con results[k] = common.PurchaseResult{ Recommendation: recs[k], Success: false, - CommitmentID: generatePurchaseID(recs[k], region, k+1, false, cfg.Coverage), + CommitmentID: generatePurchaseID(recs[k], region, k+1, false, effectiveSizingPct(cfg)), Error: fmt.Errorf("purchase cancelled by user"), Timestamp: time.Now(), } @@ -236,7 +237,7 @@ func executePurchase(ctx context.Context, rec common.Recommendation, region stri result.Error = err } if result.CommitmentID == "" { - result.CommitmentID = generatePurchaseID(rec, region, index, false, cfg.Coverage) + result.CommitmentID = generatePurchaseID(rec, region, index, false, effectiveSizingPct(cfg)) } return result } @@ -346,6 +347,7 @@ func processRegionRecommendations( engineData engineVersionData, isDryRun bool, cfg Config, + coverageMap recommendations.PoolCoverageMap, ) regionRecommendations { result := regionRecommendations{ recommendations: make([]common.Recommendation, 0), @@ -373,8 +375,11 @@ func processRegionRecommendations( return result } - // Apply coverage and overrides - filteredRecs := applyCoverageAndOverrides(recs, cfg) + // Apply coverage and overrides. This legacy path (test-only) doesn't + // fetch expiring commitments — expiry-aware sizing only runs via the + // main pipeline in fetchAndFilterRegionRecs, which has access to the + // regional service client at the right moment. + filteredRecs := applyCoverageAndOverrides(recs, cfg, coverageMap, nil) result.recommendations = filteredRecs @@ -458,11 +463,45 @@ func applyRegionFilters( return recs } -// applyCoverageAndOverrides applies coverage percentage and count overrides -func applyCoverageAndOverrides(recs []common.Recommendation, cfg Config) []common.Recommendation { - // Apply coverage - filteredRecs := applyCommonCoverage(recs, cfg.Coverage) - AppLogger.Printf(" 📈 Applying %.1f%% coverage: %d recommendations selected\n", cfg.Coverage, len(filteredRecs)) +// applyCoverageAndOverrides applies sizing (coverage % or target-coverage) +// and count overrides. Sizing mode is selected by cfg.TargetCoverage: > 0 +// routes to ApplyTargetCoverage; otherwise the legacy ApplyCoverage path. +// coverageMap (when non-nil) populates Recommendation.ExistingCoveragePct +// before sizing so the under-buy formula can subtract pool coverage already +// owned. Nil map = no-op; recs keep their default zero values and the +// sizing path falls back to the no-existing-commitments formula. +// +// expiringCommitments (when non-empty and cfg.RebuyWindowDays > 0) reduces +// ExistingCoveragePct further by the share of pool demand attributable to +// RIs expiring within the window, so --target-coverage recommends +// replacements before the cliff. Doesn't run if either input is empty. +func applyCoverageAndOverrides(recs []common.Recommendation, cfg Config, coverageMap recommendations.PoolCoverageMap, expiringCommitments []common.Commitment) []common.Recommendation { + recommendations.ApplyCoverageMapToRecommendations(recs, coverageMap) + if cfg.RebuyWindowDays > 0 && len(expiringCommitments) > 0 { + n := recommendations.AdjustExistingCoverageForExpiringCommitments(recs, expiringCommitments, cfg.RebuyWindowDays) + if n > 0 { + AppLogger.Printf(" ⏰ Treating %d recs as partially uncovered (RIs expiring within %d days)\n", n, cfg.RebuyWindowDays) + } + } + // Family-NU sizing for RDS recs: AWS rec API already bundles size-flex + // demand within a family into one rec at one size, so per-pool sizing + // under-buys. Run the family-NU pass first (RDS only, target-coverage + // mode only); non-RDS recs flow through the per-pool path unchanged. + // When TargetCoverage isn't set (legacy --coverage path) the per-pool + // flow handles everything as before. + var sizedRDS []common.Recommendation + rest := recs + if cfg.TargetCoverage > 0 { + sizedRDS, rest = recommendations.ApplyFamilyNUSizingRDS(recs, coverageMap, cfg.TargetCoverage) + } + filteredRecs := applySizing(rest, cfg, cfg.Coverage) + filteredRecs = append(filteredRecs, sizedRDS...) + if cfg.TargetCoverage > 0 { + AppLogger.Printf(" 🎯 Applying %.1f%% target-coverage: %d recommendations selected (%d via family-NU, %d via per-pool)\n", + cfg.TargetCoverage, len(filteredRecs), len(sizedRDS), len(filteredRecs)-len(sizedRDS)) + } else { + AppLogger.Printf(" 📈 Applying %.1f%% coverage: %d recommendations selected\n", cfg.Coverage, len(filteredRecs)) + } // Apply count override if specified if cfg.OverrideCount > 0 { @@ -509,6 +548,8 @@ func checkDuplicatesAndApplyLimit( // fetchAndFilterRegionRecs fetches, filters, applies coverage, and deduplicates // recommendations for a single service+region. No purchases are made. +// coverageMap (when non-nil) feeds existing-pool coverage into the sizing +// step for --target-coverage. func fetchAndFilterRegionRecs( ctx context.Context, awsCfg aws.Config, @@ -519,6 +560,7 @@ func fetchAndFilterRegionRecs( regionIndex, totalRegions int, engineData engineVersionData, cfg Config, + coverageMap recommendations.PoolCoverageMap, ) []common.Recommendation { AppLogger.Printf("\n 📍 [%d/%d] Region: %s\n", regionIndex, totalRegions, region) @@ -536,12 +578,29 @@ func fetchAndFilterRegionRecs( return nil } - recs = applyCoverageAndOverrides(recs, cfg) - - // Deduplication: skip recs matching recently-purchased commitments + // Build the regional service client once and reuse for both expiry-aware + // coverage adjustment and the downstream duplicate check. regionalCfg := awsCfg.Copy() regionalCfg.Region = region serviceClient := createServiceClient(service, regionalCfg) + + // Fetch existing commitments up front when --rebuy-window-days is set so + // the sizing step can deduct expiring RIs from ExistingCoveragePct. + // Best-effort: a failure here logs and continues; expiry adjustment + // becomes a no-op for this region. + var expiringCommitments []common.Commitment + if cfg.RebuyWindowDays > 0 && serviceClient != nil { + commits, err := serviceClient.GetExistingCommitments(ctx) + if err != nil { + AppLogger.Printf(" ⚠️ Could not fetch existing commitments for expiry check: %v\n", err) + } else { + expiringCommitments = commits + } + } + + recs = applyCoverageAndOverrides(recs, cfg, coverageMap, expiringCommitments) + + // Deduplication: skip recs matching recently-purchased commitments. if serviceClient != nil { recs = checkDuplicatesAndApplyLimit(ctx, recs, serviceClient, cfg) } @@ -549,7 +608,10 @@ func fetchAndFilterRegionRecs( return recs } -// fetchAllRecs collects recommendations from all services and regions without purchasing. +// fetchAllRecs collects recommendations from all services and regions without +// purchasing. coverageMap (when non-nil) populates Recommendation.ExistingCoveragePct +// on each rec before sizing, so --target-coverage can subtract what's already +// owned in the same pool. func fetchAllRecs( ctx context.Context, awsCfg aws.Config, @@ -558,6 +620,7 @@ func fetchAllRecs( servicesToProcess []common.ServiceType, engineData engineVersionData, cfg Config, + coverageMap recommendations.PoolCoverageMap, ) []common.Recommendation { all := make([]common.Recommendation, 0) for _, service := range servicesToProcess { @@ -571,7 +634,7 @@ func fetchAllRecs( continue } for i, region := range regions { - recs := fetchAndFilterRegionRecs(ctx, awsCfg, recClient, accountCache, service, region, i+1, len(regions), engineData, cfg) + recs := fetchAndFilterRegionRecs(ctx, awsCfg, recClient, accountCache, service, region, i+1, len(regions), engineData, cfg, coverageMap) all = append(all, recs...) } } diff --git a/cmd/validators.go b/cmd/validators.go index 9753a0b4..30de41b5 100644 --- a/cmd/validators.go +++ b/cmd/validators.go @@ -13,7 +13,7 @@ import ( // validateFlags performs validation on command line flags before execution func validateFlags(cmd *cobra.Command, args []string) error { - if err := validateNumericRanges(); err != nil { + if err := validateNumericRanges(cmd); err != nil { return err } @@ -32,13 +32,20 @@ func validateFlags(cmd *cobra.Command, args []string) error { return nil } -// validateNumericRanges validates all numeric configuration values -func validateNumericRanges() error { +// validateNumericRanges validates all numeric configuration values. +// cmd is used to detect explicitly-set flags via cobra's Changed(); pass +// nil if no flag-source-detection is required (e.g. unit-test paths that +// only need the numeric bounds checks). +func validateNumericRanges(cmd *cobra.Command) error { // Validate coverage percentage if toolCfg.Coverage < 0 || toolCfg.Coverage > 100 { return fmt.Errorf("coverage percentage must be between 0 and 100, got: %.2f", toolCfg.Coverage) } + if err := validateTargetCoverage(cmd); err != nil { + return err + } + // Validate max instances if toolCfg.MaxInstances < 0 { return fmt.Errorf("max-instances must be 0 (no limit) or a positive number, got: %d", toolCfg.MaxInstances) @@ -60,6 +67,26 @@ func validateNumericRanges() error { return nil } +// validateTargetCoverage validates the --target-coverage range and +// emits an info-log when it is set alongside an explicitly-overridden +// --coverage (target wins). Split out of validateNumericRanges to keep +// the parent under gocyclo's complexity threshold. +func validateTargetCoverage(cmd *cobra.Command) error { + if toolCfg.TargetCoverage < 0 || toolCfg.TargetCoverage > 100 { + return fmt.Errorf("target-coverage percentage must be between 0 and 100, got: %.2f", toolCfg.TargetCoverage) + } + + // Info-log when both flags are explicitly set — target-coverage wins. + // Detect "user explicitly set --coverage" via cobra's Changed() rather than + // comparing to the default value, so a user who happens to set --coverage 80 + // (which equals the default) still sees the notice. + if toolCfg.TargetCoverage > 0 && cmd != nil && cmd.Flags().Changed("coverage") { + log.Printf("--target-coverage=%.1f set; --coverage=%.1f is being ignored (target-coverage sizing supersedes coverage sizing)", + toolCfg.TargetCoverage, toolCfg.Coverage) + } + return nil +} + // validatePaymentAndTerm validates payment options and term configuration func validatePaymentAndTerm() error { // Validate payment option diff --git a/cmd/validators_test.go b/cmd/validators_test.go index 4fb7e37b..f5b7378c 100644 --- a/cmd/validators_test.go +++ b/cmd/validators_test.go @@ -7,6 +7,7 @@ import ( "testing" "github.com/LeanerCloud/CUDly/pkg/common" + "github.com/spf13/cobra" ) func TestValidateNumericRanges(t *testing.T) { @@ -87,8 +88,10 @@ func TestValidateNumericRanges(t *testing.T) { toolCfg = Config{} tt.setupFunc() - // Execute - err := validateNumericRanges() + // Execute (nil cmd — these tests only exercise the numeric + // bounds checks; the flag-source-detection branch is covered + // by TestValidateTargetCoverage below). + err := validateNumericRanges(nil) // Verify if tt.wantErr { @@ -411,3 +414,74 @@ func TestValidateNoConflicts(t *testing.T) { } // Note: TestValidateInstanceTypes and TestValidateFlags already exist in main_test.go + +// TestValidateTargetCoverage covers the --target-coverage range check +// and the "both flags explicitly set" info-log gate. Range cases pass nil +// cmd (the explicit-flag detection is irrelevant for them); the "both set" +// case constructs a real cobra command and marks --coverage as explicitly +// set so the Changed("coverage") branch actually fires. The log line +// itself isn't asserted (log.Printf goes to stderr and capturing it from +// this package is more friction than value). +func TestValidateTargetCoverage(t *testing.T) { + tests := []struct { + name string + target float64 + coverage float64 + wantErr bool + errSubstr string + // useCobraCmd controls whether the test builds a real cobra command + // with --coverage marked as Changed, exercising the precedence-log + // gate. False keeps the nil-cmd shortcut for pure range checks. + useCobraCmd bool + }{ + {name: "disabled (zero) is valid", target: 0, coverage: 80, wantErr: false}, + {name: "min boundary valid", target: 0.0001, coverage: 80, wantErr: false}, + {name: "max boundary valid", target: 100, coverage: 80, wantErr: false}, + {name: "mid-range valid", target: 95, coverage: 80, wantErr: false}, + { + name: "negative target rejected", target: -0.5, coverage: 80, wantErr: true, + errSubstr: "target-coverage percentage must be between 0 and 100", + }, + { + name: "above 100 rejected", target: 100.01, coverage: 80, wantErr: true, + errSubstr: "target-coverage percentage must be between 0 and 100", + }, + { + // Both flags explicitly set — the precedence info-log fires; + // validation still passes. useCobraCmd builds a real command so + // Changed("coverage") returns true; without this the branch is + // effectively dead code in the test. + name: "target + coverage both set, both valid", + target: 95, + coverage: 50, + wantErr: false, + useCobraCmd: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + toolCfg = Config{Coverage: tt.coverage, TargetCoverage: tt.target} + var cmd *cobra.Command + if tt.useCobraCmd { + cmd = &cobra.Command{Use: "test"} + // Default doesn't matter — the Set call below marks the flag + // as Changed, which is what validateTargetCoverage checks. + cmd.Flags().Float64("coverage", 80, "") + if err := cmd.Flags().Set("coverage", "50"); err != nil { + t.Fatalf("failed to mark coverage flag as Changed: %v", err) + } + } + err := validateNumericRanges(cmd) + if tt.wantErr { + if err == nil { + t.Errorf("validateNumericRanges() expected error containing %q, got nil", tt.errSubstr) + } else if tt.errSubstr != "" && !strings.Contains(err.Error(), tt.errSubstr) { + t.Errorf("validateNumericRanges() error = %v, want substring %q", err, tt.errSubstr) + } + } else if err != nil { + t.Errorf("validateNumericRanges() unexpected error = %v", err) + } + }) + } +} diff --git a/internal/reporter/reporter.go b/internal/reporter/reporter.go index 14014082..dde5a56c 100644 --- a/internal/reporter/reporter.go +++ b/internal/reporter/reporter.go @@ -82,6 +82,10 @@ func RenderExcluded(result scorer.ScoredResult) string { // RenderSummary returns a one-paragraph summary: total estimated savings and cost for // passed recommendations, plus count of filtered recommendations with a reason breakdown. +// +// EstimatedSavings is monthly: sourced from AWS's EstimatedMonthlySavingsAmount +// (see providers/aws/recommendations/parser_ri.go). CommitmentCost is the +// upfront portion (one-time), so the two figures are NOT on the same timescale. func RenderSummary(result scorer.ScoredResult) string { var totalSavings, totalCost float64 for _, rec := range result.Passed { @@ -90,7 +94,7 @@ func RenderSummary(result scorer.ScoredResult) string { } var sb strings.Builder - fmt.Fprintf(&sb, "Passed: %d recommendations — estimated savings $%.2f/yr, commitment cost $%.2f\n", + fmt.Fprintf(&sb, "Passed: %d recommendations — estimated savings $%.2f/mo, upfront commitment $%.2f\n", len(result.Passed), totalSavings, totalCost) if len(result.Filtered) > 0 { diff --git a/pkg/common/types.go b/pkg/common/types.go index 0a7d5c84..1440ef72 100644 --- a/pkg/common/types.go +++ b/pkg/common/types.go @@ -166,6 +166,36 @@ type Recommendation struct { // Break-even in months (populated by cloud parsers where available; used by scorer filter) BreakEvenMonths float64 `json:"break_even_months,omitempty" csv:"BreakEvenMonths"` + // Utilization signals — populated by cloud parsers when the API exposes them. + // Used by --target-coverage sizing (see cmd/helpers.go: ApplyTargetCoverage). + // AverageInstancesUsedPerHour is RI-only (zero for SPs and other commitment types). + // RecommendedUtilization is "what AWS projects for the full recommendation" + // (%). ProjectedUtilization / ProjectedCoverage are populated by the sizing + // step after we pick our own quantity. + // RecommendedCount is AWS's pre-sizing count (mirrors Count before + // ApplyCoverage / ApplyTargetCoverage mutates Count); zero for SPs since + // the SP commitment is dollar-denominated rather than count-denominated. + // ExistingCoveragePct is the share of demand already covered by existing + // commitments in the same pool (from CE GetReservationCoverage / + // GetSavingsPlansCoverage). Zero = "no signal" (CE returned nothing for + // this pool, or the fetch step wasn't run); sizing then degenerates to + // the no-existing-commitments path. See cmd/helpers.go. + AverageInstancesUsedPerHour float64 `json:"average_instances_used_per_hour,omitempty" csv:"AverageInstancesUsedPerHour"` + RecommendedUtilization float64 `json:"recommended_utilization,omitempty" csv:"RecommendedUtilization"` + RecommendedCount int `json:"recommended_count,omitempty" csv:"RecommendedCount"` + ExistingCoveragePct float64 `json:"existing_coverage_pct,omitempty" csv:"ExistingCoveragePct"` + // ExistingCoverageKnown distinguishes "CE returned a value for this + // pool" (Known=true, Pct possibly 0.0 meaning the pool has running + // instances but no RI coverage yet) from "CE has no data for this + // pool" (Known=false, Pct=0.0 by default). Set by + // ApplyCoverageMapToRecommendations whenever a pool lookup hits, and + // by family-NU sizing when a family-level existing% lands on the rec. + // CSV writers use this to render "n/a" for unknown vs "0.0" for + // genuine zero-coverage pools. + ExistingCoverageKnown bool `json:"existing_coverage_known,omitempty" csv:"-"` + ProjectedUtilization float64 `json:"projected_utilization,omitempty" csv:"ProjectedUtilization"` + ProjectedCoverage float64 `json:"projected_coverage,omitempty" csv:"ProjectedCoverage"` + // RawRecommendation holds the original cloud API response bytes for audit/debugging. // omitempty ensures nil is absent from JSON (not written as null). RawRecommendation json.RawMessage `json:"raw_recommendation,omitempty" csv:"-"` @@ -177,6 +207,24 @@ type ServiceDetails interface { GetDetailDescription() string } +// ScaleRecommendationCosts multiplies all cost-bearing fields of rec by +// ratio and returns the result. RecurringMonthlyCost is allocated as a +// new pointer when present so callers don't mutate the upstream rec's +// pointer target. Used by sizing paths (ApplyCoverage, ApplyTargetCoverage, +// family-NU) to keep Count and cost in sync when a recommendation is +// sized down (or up) from AWS's proposal — without this helper the same +// four-field scaling pattern was duplicated at every sizing site. +func ScaleRecommendationCosts(rec Recommendation, ratio float64) Recommendation { + rec.CommitmentCost *= ratio + rec.OnDemandCost *= ratio + rec.EstimatedSavings *= ratio + if rec.RecurringMonthlyCost != nil { + scaled := *rec.RecurringMonthlyCost * ratio + rec.RecurringMonthlyCost = &scaled + } + return rec +} + // PurchaseResult represents the outcome of a commitment purchase type PurchaseResult struct { Recommendation Recommendation `json:"recommendation"` @@ -225,7 +273,16 @@ func NormalizeSource(s string) (string, error) { } } -// Commitment represents an existing commitment (RI/SP/CUD/etc) +// Commitment represents an existing commitment (RI/SP/CUD/etc). +// +// Deployment is RDS-specific (Multi-AZ vs Single-AZ); it stays empty for +// services that don't have a deployment dimension (EC2, ElastiCache, +// etc). When populated, it carries the same vocabulary as +// DatabaseDetails.AZConfig on Recommendation ("single-az" / "multi-az") +// so pool-key matching can collapse both sides via normaliseDeployment +// in the recommendations package. Without this field, RDS expiry +// adjustments would silently miss because Recommendation lookup keys +// are deployment-aware while commitment keys defaulted to empty. type Commitment struct { Provider ProviderType `json:"provider"` Account string `json:"account"` @@ -234,7 +291,8 @@ type Commitment struct { Service ServiceType `json:"service"` Region string `json:"region"` ResourceType string `json:"resource_type"` - Engine string `json:"engine,omitempty"` // Database engine for RDS/ElastiCache (e.g., "mysql", "aurora-postgresql") + Engine string `json:"engine,omitempty"` // Database engine for RDS/ElastiCache (e.g., "mysql", "aurora-postgresql") + Deployment string `json:"deployment,omitempty"` // RDS Multi-AZ vs Single-AZ; empty for non-RDS Count int `json:"count"` StartDate time.Time `json:"start_date"` EndDate time.Time `json:"end_date"` diff --git a/providers/aws/recommendations/client.go b/providers/aws/recommendations/client.go index 31d4121f..efc7ed31 100644 --- a/providers/aws/recommendations/client.go +++ b/providers/aws/recommendations/client.go @@ -20,6 +20,7 @@ type CostExplorerAPI interface { GetReservationPurchaseRecommendation(ctx context.Context, params *costexplorer.GetReservationPurchaseRecommendationInput, optFns ...func(*costexplorer.Options)) (*costexplorer.GetReservationPurchaseRecommendationOutput, error) GetSavingsPlansPurchaseRecommendation(ctx context.Context, params *costexplorer.GetSavingsPlansPurchaseRecommendationInput, optFns ...func(*costexplorer.Options)) (*costexplorer.GetSavingsPlansPurchaseRecommendationOutput, error) GetReservationUtilization(ctx context.Context, params *costexplorer.GetReservationUtilizationInput, optFns ...func(*costexplorer.Options)) (*costexplorer.GetReservationUtilizationOutput, error) + GetReservationCoverage(ctx context.Context, params *costexplorer.GetReservationCoverageInput, optFns ...func(*costexplorer.Options)) (*costexplorer.GetReservationCoverageOutput, error) } // Client wraps the AWS Cost Explorer client for RI recommendations diff --git a/providers/aws/recommendations/client_test.go b/providers/aws/recommendations/client_test.go index c99c3e3b..a8ddee1e 100644 --- a/providers/aws/recommendations/client_test.go +++ b/providers/aws/recommendations/client_test.go @@ -47,6 +47,10 @@ func (m *mockCostExplorerAPI) GetReservationUtilization(ctx context.Context, par return &costexplorer.GetReservationUtilizationOutput{}, nil } +func (m *mockCostExplorerAPI) GetReservationCoverage(ctx context.Context, params *costexplorer.GetReservationCoverageInput, optFns ...func(*costexplorer.Options)) (*costexplorer.GetReservationCoverageOutput, error) { + return &costexplorer.GetReservationCoverageOutput{}, nil +} + func TestNewClient(t *testing.T) { cfg := aws.Config{ Region: "us-west-2", diff --git a/providers/aws/recommendations/coverage.go b/providers/aws/recommendations/coverage.go new file mode 100644 index 00000000..415a7101 --- /dev/null +++ b/providers/aws/recommendations/coverage.go @@ -0,0 +1,475 @@ +package recommendations + +import ( + "context" + "fmt" + "strings" + "time" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/costexplorer" + "github.com/aws/aws-sdk-go-v2/service/costexplorer/types" + + "github.com/LeanerCloud/CUDly/pkg/common" +) + +// coverageServiceFilters lists the Cost Explorer service-dimension values +// we issue GetReservationCoverage against. CE returns only EC2 coverage when +// no SERVICE filter is set, so any account that runs RDS/ElastiCache/ +// OpenSearch/Redshift RIs needs a per-service call. The service strings here +// match CE's canonical dimension values (case-sensitive). +// +// RDS gets special handling — same instance type often runs multiple engines +// in the same region, and the AWS console coverage report further splits each +// engine row by deployment option (Single-AZ vs Multi-AZ), so we fan out +// across engines via the DATABASE_ENGINE filter and pull DEPLOYMENT_OPTION +// into the GroupBy alongside INSTANCE_TYPE. +var coverageServiceFilters = []string{ + "Amazon Elastic Compute Cloud - Compute", // EC2 + "Amazon ElastiCache", // ElastiCache + "Amazon OpenSearch Service", // OpenSearch + "Amazon Redshift", // Redshift + "Amazon MemoryDB", // MemoryDB +} + +// rdsCoverageEngines enumerates CE's DATABASE_ENGINE dimension values for +// RDS recommendations. CUDly's parser normalises engine names to lowercase +// shorthand (e.g. "aurora-postgresql") for rec.Details.Engine; CE expects +// the human-readable form here (e.g. "Aurora PostgreSQL"). normaliseRDSEngine +// canonicalises both sides to the same lookup key. +var rdsCoverageEngines = []string{ + "MySQL", + "PostgreSQL", + "MariaDB", + "Oracle", + "SQL Server", + "Aurora MySQL", + "Aurora PostgreSQL", +} + +const rdsServiceFilter = "Amazon Relational Database Service" + +// PoolCoverage captures the two CE coverage signals we use downstream: the +// share of historical demand already covered by existing reservations +// (Pct, 0-100) and the average concurrent-instances figure derived from +// TotalRunningHours over the lookback window (AvgInstancesPerHour). +// +// AvgInstancesPerHour matches the "Total running hours / window hours" +// figure shown in the AWS console reservations-coverage report; it's what +// --target-coverage sizing now anchors on (linear in avg × gap%) so a +// pool's CUDly buy lines up with the same math operators see in the +// console CSV. Zero means CE returned no running hours for the pool over +// the window — sizing then falls back to rec.AverageInstancesUsedPerHour +// from the rec parser (per-account signal from +// GetReservationPurchaseRecommendation). +type PoolCoverage struct { + Pct float64 + AvgInstancesPerHour float64 +} + +// PoolCoverageMap maps a pool key to the (pct, avg) pair for that pool. +// Used by --target-coverage sizing to subtract existing commitments from +// the under-buy formula and to size linearly off the org-wide demand. +// Keys are org-wide (no account dimension) to match the AWS console's +// reservations-coverage report: non-RDS keys are "region:instance_type"; +// RDS keys are "region:instance_type:engine:deployment" so Single-AZ vs +// Multi-AZ pools stay distinct (a Single-AZ RI cannot cover a Multi-AZ +// instance). +type PoolCoverageMap map[string]PoolCoverage + +// poolKey returns the canonical non-RDS lookup key for a (region, +// instance_type) tuple. Both inputs are lower-cased so callers don't have +// to normalise case at every site. +func poolKey(region, instanceType string) string { + return strings.ToLower(region) + ":" + strings.ToLower(instanceType) +} + +// rdsPoolKey returns the engine + deployment-aware lookup key for an RDS +// pool. CE's DATABASE_ENGINE dimension uses human-readable strings +// ("Aurora MySQL"), while CUDly's parser stores the shorthand on +// rec.Details.Engine ("aurora-mysql"). DEPLOYMENT_OPTION ("Single-AZ" / +// "Multi-AZ") similarly maps to the parser's AZConfig ("single-az" / +// "multi-az"). Both sides normalise to the same lookup key here so the +// producer (per-engine fetcher) and consumer (apply helper) agree. +func rdsPoolKey(region, instanceType, engine, deployment string) string { + return strings.ToLower(region) + ":" + + strings.ToLower(instanceType) + ":" + + normaliseRDSEngine(engine) + ":" + + normaliseDeployment(deployment) +} + +// normaliseRDSEngine canonicalises an engine string from either CE's +// display form ("Aurora MySQL", "PostgreSQL", "SQL Server") or the +// shorter forms returned by RDS APIs ("aurora-mysql", "postgres", +// "sqlserver-ee", "oracle-se2") to a single lowercase no-spaces +// representation. Strips spaces / hyphens / underscores AND collapses +// well-known short-form aliases plus edition suffixes so both producer +// (per-engine CE fetcher seeded from rdsCoverageEngines) and consumer +// (apply helper reading rec.Details.Engine) land on the same key +// regardless of which API surfaced the engine string. +// +// Specifically: +// - "postgres" collapses to "postgresql" (the bare RDS engine slug +// vs the CE display form). +// - "sqlserver-ee" / "sqlserver-se" / "sqlserver-ex" / "sqlserver-web" +// all collapse to "sqlserver" (CE returns the bare display form +// "SQL Server" while RDS Describe* APIs return edition-suffixed +// slugs). +// - "oracle-ee" / "oracle-se" / "oracle-se1" / "oracle-se2" collapse +// to "oracle". +// +// Without the alias collapse, a rec whose parser saw "postgres" (or an +// Oracle/SQL-Server edition slug) would miss the coverage entry the +// per-engine fetcher wrote under the display-form key, silently +// dropping ExistingCoveragePct to zero and over-buying for that pool. +func normaliseRDSEngine(engine string) string { + s := strings.ToLower(engine) + s = strings.ReplaceAll(s, " ", "") + s = strings.ReplaceAll(s, "-", "") + s = strings.ReplaceAll(s, "_", "") + if s == "postgres" { + return "postgresql" + } + for _, family := range []string{"sqlserver", "oracle"} { + if strings.HasPrefix(s, family) { + return family + } + } + return s +} + +// normaliseDeployment canonicalises a deployment-option string from either +// CE's "Single-AZ" / "Multi-AZ" form or the parser's "single-az" / +// "multi-az" form to a single lowercase no-spaces representation. The +// "Multi-AZ (readable standbys)" variant collapses to a distinct value +// ("multiazreadablestandbys") since it's a distinct RI scope. +// +// An empty input yields "" (not "unknown") so missing-deployment recs +// produce a deterministic miss in the lookup map rather than colliding +// with a real bucket. +func normaliseDeployment(deployment string) string { + s := strings.ToLower(deployment) + s = strings.ReplaceAll(s, " ", "") + s = strings.ReplaceAll(s, "-", "") + s = strings.ReplaceAll(s, "_", "") + s = strings.ReplaceAll(s, "(", "") + s = strings.ReplaceAll(s, ")", "") + return s +} + +// GetRICoverageMap fetches existing-RI coverage % and avg-running-instances +// over the last lookbackDays days, returning a map keyed by org-wide pool +// (no account dimension — matches the AWS console reservations-coverage +// report). Operators wiring the result back onto Recommendations should +// call ApplyCoverageMapToRecommendations rather than walking the map +// manually. +// +// Non-RDS calls group by INSTANCE_TYPE alone (REGION + SERVICE in the +// Filter). RDS calls group by INSTANCE_TYPE + DEPLOYMENT_OPTION (REGION + +// SERVICE + DATABASE_ENGINE in the Filter) so the resulting keys split +// Single-AZ vs Multi-AZ pools, which have separate RI scopes. +// +// Missing pools (no demand in the pool over the window) are omitted from +// the map; ApplyCoverageMapToRecommendations leaves +// rec.ExistingCoveragePct at zero for those recs, which the sizing path +// treats as "no signal" and falls back to the no-existing-commitments +// formula. +func (c *Client) GetRICoverageMap(ctx context.Context, lookbackDays int, regions []string) (PoolCoverageMap, error) { + if lookbackDays <= 0 { + lookbackDays = 30 + } + end := time.Now().UTC() + start := end.AddDate(0, 0, -lookbackDays) + startStr := start.Format("2006-01-02") + endStr := end.Format("2006-01-02") + windowHours := float64(lookbackDays * 24) + + out := make(PoolCoverageMap) + // Cost: one CE call per (service, region) for non-RDS; one CE call per + // (engine, region) for RDS. For a 5-service × 23-region survey + 7 + // engines × 23 regions ≈ 270 calls (~$2.70/run). Empty regions return + // quickly so the bound is loose in practice. + for _, region := range regions { + for _, service := range coverageServiceFilters { + if err := c.fetchCoverageForServiceRegion(ctx, startStr, endStr, windowHours, service, region, out); err != nil { + return nil, fmt.Errorf("fetching coverage for service %q region %q: %w", service, region, err) + } + } + for _, engine := range rdsCoverageEngines { + if err := c.fetchCoverageForRDSEngineRegion(ctx, startStr, endStr, windowHours, engine, region, out); err != nil { + return nil, fmt.Errorf("fetching coverage for RDS engine %q region %q: %w", engine, region, err) + } + } + } + return out, nil +} + +// fetchCoverageForRDSEngineRegion fetches coverage for one (RDS engine, +// region) tuple. Writes entries keyed by +// "region:instance_type:engine:deployment" so the apply helper looks up +// per-engine per-deployment coverage. This solves both the cross-engine +// bleed (db.r7g.large with heavy Aurora coverage looking covered for +// MySQL too) and the cross-deployment bleed (a Single-AZ RI being +// credited against Multi-AZ demand). +func (c *Client) fetchCoverageForRDSEngineRegion(ctx context.Context, startStr, endStr string, windowHours float64, engine, region string, out PoolCoverageMap) error { + input := &costexplorer.GetReservationCoverageInput{ + TimePeriod: &types.DateInterval{Start: aws.String(startStr), End: aws.String(endStr)}, + GroupBy: []types.GroupDefinition{ + {Type: types.GroupDefinitionTypeDimension, Key: aws.String(string(types.DimensionInstanceType))}, + {Type: types.GroupDefinitionTypeDimension, Key: aws.String(string(types.DimensionDeploymentOption))}, + }, + Filter: rdsEngineRegionFilter(engine, region), + Metrics: []string{"Hour"}, + } + return c.fetchCoveragePaged(ctx, input, func(instType, deployment string, cov PoolCoverage) { + out[rdsPoolKey(region, instType, engine, deployment)] = cov + }, windowHours) +} + +// fetchCoverageForServiceRegion fetches coverage for one (non-RDS +// service, region) tuple. Writes entries keyed by "region:instance_type" +// — non-RDS services don't have an engine or deployment split worth +// tracking the way RDS does, so we group by INSTANCE_TYPE alone. +// +// "Hour" tells CE to include the Hour coverage block in the response; +// CoverageHoursPercentage + TotalRunningHours inside that block are what +// we actually parse. HoursPercentage isn't a valid Metrics value (CE +// rejects it with ValidationException) — Metrics names the block, the +// percentage / hours fields are computed and included automatically. +func (c *Client) fetchCoverageForServiceRegion(ctx context.Context, startStr, endStr string, windowHours float64, service, region string, out PoolCoverageMap) error { + input := &costexplorer.GetReservationCoverageInput{ + TimePeriod: &types.DateInterval{Start: aws.String(startStr), End: aws.String(endStr)}, + GroupBy: []types.GroupDefinition{ + {Type: types.GroupDefinitionTypeDimension, Key: aws.String(string(types.DimensionInstanceType))}, + }, + Filter: serviceRegionFilter(service, region), + Metrics: []string{"Hour"}, + } + return c.fetchCoveragePaged(ctx, input, func(instType, _ string, cov PoolCoverage) { + out[poolKey(region, instType)] = cov + }, windowHours) +} + +// fetchCoveragePaged runs the paginated GetReservationCoverage loop and +// invokes record on each group with a non-empty INSTANCE_TYPE and a +// valid Coverage block. The keyed-write logic is callsite-specific +// (RDS keys carry engine + deployment, non-RDS keys don't), so record +// closes over the key shape the caller wants. +func (c *Client) fetchCoveragePaged( + ctx context.Context, + input *costexplorer.GetReservationCoverageInput, + record func(instType, deployment string, cov PoolCoverage), + windowHours float64, +) error { + var token *string + for { + input.NextPageToken = token + result, err := c.fetchCoveragePage(ctx, input) + if err != nil { + return err + } + for _, period := range result.CoveragesByTime { + for _, group := range period.Groups { + instType, deployment := extractGroupAttributes(group.Attributes) + if instType == "" { + continue + } + cov, ok := poolCoverageFromGroup(group, windowHours) + if !ok { + continue + } + record(instType, deployment, cov) + } + } + if result.NextPageToken == nil || *result.NextPageToken == "" { + return nil + } + token = result.NextPageToken + } +} + +// rdsEngineRegionFilter builds the CE Filter expression scoping a +// GetReservationCoverage call to a single (RDS engine, region) tuple. +// Extracted so the fetch path doesn't bury the filter shape inside the +// input struct literal. +func rdsEngineRegionFilter(engine, region string) *types.Expression { + return &types.Expression{ + And: []types.Expression{ + {Dimensions: &types.DimensionValues{Key: types.DimensionService, Values: []string{rdsServiceFilter}}}, + {Dimensions: &types.DimensionValues{Key: types.DimensionDatabaseEngine, Values: []string{engine}}}, + {Dimensions: &types.DimensionValues{Key: types.DimensionRegion, Values: []string{region}}}, + }, + } +} + +// serviceRegionFilter builds the CE Filter expression scoping a +// GetReservationCoverage call to a single (service, region) tuple +// (non-RDS variant: no DATABASE_ENGINE dimension). +func serviceRegionFilter(service, region string) *types.Expression { + return &types.Expression{ + And: []types.Expression{ + {Dimensions: &types.DimensionValues{Key: types.DimensionService, Values: []string{service}}}, + {Dimensions: &types.DimensionValues{Key: types.DimensionRegion, Values: []string{region}}}, + }, + } +} + +// fetchCoveragePage calls the Cost Explorer API with rate-limit retry. +// Mirrors fetchUtilizationPage so the two paths fail and back off the +// same way. +func (c *Client) fetchCoveragePage(ctx context.Context, input *costexplorer.GetReservationCoverageInput) (*costexplorer.GetReservationCoverageOutput, error) { + c.rateLimiter.Reset() + for { + if waitErr := c.rateLimiter.Wait(ctx); waitErr != nil { + return nil, fmt.Errorf("rate limiter wait failed: %w", waitErr) + } + result, err := c.costExplorerClient.GetReservationCoverage(ctx, input) + if !c.rateLimiter.ShouldRetry(err) { + if err != nil { + return nil, fmt.Errorf("failed to get reservation coverage: %w", err) + } + return result, nil + } + } +} + +// poolCoverageFromGroup pulls the (pct, avg) pair from a CE response +// group's CoverageHours block. Returns (zero-value, false) when the block +// is absent or missing the percentage field — caller drops the group. +// AvgInstancesPerHour = TotalRunningHours / windowHours, matching the AWS +// console's "instances over the lookback window" view. windowHours is +// 24 * lookbackDays from the GetRICoverageMap caller. +func poolCoverageFromGroup(group types.ReservationCoverageGroup, windowHours float64) (PoolCoverage, bool) { + if group.Coverage == nil || group.Coverage.CoverageHours == nil || + group.Coverage.CoverageHours.CoverageHoursPercentage == nil { + return PoolCoverage{}, false + } + pct := parseFloat(aws.ToString(group.Coverage.CoverageHours.CoverageHoursPercentage)) + var avg float64 + if windowHours > 0 && group.Coverage.CoverageHours.TotalRunningHours != nil { + avg = parseFloat(aws.ToString(group.Coverage.CoverageHours.TotalRunningHours)) / windowHours + } + return PoolCoverage{Pct: pct, AvgInstancesPerHour: avg}, true +} + +// extractGroupAttributes reads INSTANCE_TYPE and DEPLOYMENT_OPTION values +// from CE's Attributes map. CE sends keys in camelCase ("instanceType", +// "deploymentOption") even though the GroupBy input expects +// SCREAMING_SNAKE_CASE ("INSTANCE_TYPE", "DEPLOYMENT_OPTION"); normalise +// both sides by stripping underscores and lower-casing before comparing. +// Returns empty strings for absent dimensions — the deployment slot is +// optional (non-RDS callers don't group by it and won't pass it through). +func extractGroupAttributes(attrs map[string]string) (instanceType, deployment string) { + for k, v := range attrs { + switch strings.ToLower(strings.ReplaceAll(k, "_", "")) { + case "instancetype": + instanceType = strings.ToLower(v) + case "deploymentoption": + deployment = v + } + } + return instanceType, deployment +} + +// ApplyCoverageMapToRecommendations sets ExistingCoveragePct on each rec +// whose pool key appears in the map, and rebalances each rec's +// AverageInstancesUsedPerHour so that the per-pool sum across recs equals +// the coverage map's org-wide AvgInstancesPerHour for the pool. Pool key +// shape depends on service: RDS recs (DatabaseDetails carrying an engine +// + AZConfig) look up by "region:instance_type:engine:deployment"; other +// services look up by "region:instance_type". Recs without a match stay +// at zero, which the sizing path treats as "no signal" and falls back to +// the no-existing-commitments formula. +// +// Why rebalance instead of just trusting per-rec avgs: AWS's +// GetReservationPurchaseRecommendation returns one rec per (pool, account) +// where it sees demand worth recommending. When AWS rec API only returns +// per-account avgs that sum to less than what CE coverage reports +// org-wide for the same pool (some linked accounts in the pool aren't +// surfaced as recs), per-rec sizing under-buys for the pool as a whole. +// Rebalancing scales each rec's avg by (cov.avg / sum-of-rec-avgs-in-pool) +// so the sized per-rec purchases sum to what the coverage CSV's gap math +// implies. When AWS rec API already matches coverage, the scale factor is +// ~1.0 and behaviour is unchanged. When multiple recs have zero avg +// (no per-account signal at all), the coverage avg is split evenly across +// them so the total still lines up. +// +// Mutates recs in place to mirror the way the sizing pipeline already +// hands recs around by value within each loop iteration. +func ApplyCoverageMapToRecommendations(recs []common.Recommendation, coverage PoolCoverageMap) { + if len(coverage) == 0 { + return + } + // First pass: index recs by pool key and sum per-account avgs so we + // can compute the scaling factor needed to land the per-pool total on + // the coverage's org-wide avg. + poolIdx := make(map[string][]int) + poolAvgSum := make(map[string]float64) + for i := range recs { + key := lookupPoolKey(recs[i]) + poolIdx[key] = append(poolIdx[key], i) + if recs[i].AverageInstancesUsedPerHour > 0 { + poolAvgSum[key] += recs[i].AverageInstancesUsedPerHour + } + } + // Second pass: set ExistingCoveragePct + scale AverageInstancesUsedPerHour + // per rec so the pool's recs sum to cov.AvgInstancesPerHour. + for i := range recs { + key := lookupPoolKey(recs[i]) + cov, ok := coverage[key] + if !ok { + continue + } + recs[i].ExistingCoveragePct = cov.Pct + recs[i].ExistingCoverageKnown = true + if cov.AvgInstancesPerHour <= 0 { + // No org-wide avg signal — leave rec.avg as-is (rec API's + // per-account number is the only signal we have for sizing). + continue + } + if recSum := poolAvgSum[key]; recSum > 0 { + // Scale this rec's per-account avg by its proportional share + // of the org-wide avg. When AWS rec API's per-account total + // matches CE coverage, scale ≈ 1 (no-op). When AWS rec API + // under-counts (some accounts not surfaced), scale > 1 and + // each rec's sized buy grows proportionally so the per-pool + // sum lands on the coverage figure. + recs[i].AverageInstancesUsedPerHour *= cov.AvgInstancesPerHour / recSum + } else { + // Every rec in this pool came back with zero avg — split the + // coverage's avg evenly across them. Equal distribution is + // the only fair choice when there's no per-account signal to + // weight by. + recs[i].AverageInstancesUsedPerHour = cov.AvgInstancesPerHour / float64(len(poolIdx[key])) + } + } +} + +// lookupPoolKey returns the pool key for a recommendation. RDS uses the +// engine + deployment-aware form; non-RDS uses the region+type form. Keys +// are org-wide (no account dimension) so the same pool seen from any +// linked account looks up the same coverage entry — matches the AWS +// console reservations-coverage report. +func lookupPoolKey(rec common.Recommendation) string { + if engine, deployment := rdsEngineDeploymentFromRec(rec); engine != "" { + return rdsPoolKey(rec.Region, rec.ResourceType, engine, deployment) + } + return poolKey(rec.Region, rec.ResourceType) +} + +// rdsEngineDeploymentFromRec extracts the RDS engine and deployment +// strings from a recommendation's polymorphic Details, returning ("", "") +// when the rec isn't an RDS rec. Handles both pointer and value forms of +// DatabaseDetails because the live parser uses pointers and the CSV +// loader uses values. +func rdsEngineDeploymentFromRec(rec common.Recommendation) (engine, deployment string) { + switch details := rec.Details.(type) { + case *common.DatabaseDetails: + if details != nil { + return details.Engine, details.AZConfig + } + case common.DatabaseDetails: + return details.Engine, details.AZConfig + } + return "", "" +} diff --git a/providers/aws/recommendations/coverage_test.go b/providers/aws/recommendations/coverage_test.go new file mode 100644 index 00000000..ac2df72c --- /dev/null +++ b/providers/aws/recommendations/coverage_test.go @@ -0,0 +1,382 @@ +package recommendations + +import ( + "context" + "testing" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/costexplorer" + "github.com/aws/aws-sdk-go-v2/service/costexplorer/types" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/LeanerCloud/CUDly/pkg/common" +) + +// mockCoverageCE extends the test mock with a configurable GetReservationCoverage +// response so the coverage path can be exercised without hitting AWS. +type mockCoverageCE struct { + mockCostExplorerAPI + coverageOutput *costexplorer.GetReservationCoverageOutput + coverageError error + coverageCalls int +} + +func (m *mockCoverageCE) GetReservationCoverage(ctx context.Context, params *costexplorer.GetReservationCoverageInput, optFns ...func(*costexplorer.Options)) (*costexplorer.GetReservationCoverageOutput, error) { + m.coverageCalls++ + if m.coverageError != nil { + return nil, m.coverageError + } + return m.coverageOutput, nil +} + +// TestGetRICoverageMap_GroupsByInstanceType confirms a single-page CE +// response is parsed into the expected pool-keyed map and that absent +// attribute keys are skipped rather than producing zero-valued entries. +// Coverage is org-wide (no account dimension) — matches the AWS console +// reservations-coverage report shape. +func TestGetRICoverageMap_GroupsByInstanceType(t *testing.T) { + mock := &mockCoverageCE{ + coverageOutput: &costexplorer.GetReservationCoverageOutput{ + CoveragesByTime: []types.CoverageByTime{ + { + Groups: []types.ReservationCoverageGroup{ + { + Attributes: map[string]string{ + "instanceType": "db.r6g.large", + }, + Coverage: &types.Coverage{ + CoverageHours: &types.CoverageHours{ + CoverageHoursPercentage: aws.String("50.0"), + // 720h in a 30-day window = avg 1 instance. + TotalRunningHours: aws.String("720"), + }, + }, + }, + { + // SCREAMING_SNAKE_CASE fallback path: CE returns + // camelCase keys in practice but the parser tolerates + // either form. + Attributes: map[string]string{ + "INSTANCE_TYPE": "db.m5.xlarge", + }, + Coverage: &types.Coverage{ + CoverageHours: &types.CoverageHours{ + CoverageHoursPercentage: aws.String("33.7"), + // 7200h in a 30-day window = avg 10 instances. + TotalRunningHours: aws.String("7200"), + }, + }, + }, + { + // Missing INSTANCE_TYPE attribute → skipped. + Attributes: map[string]string{}, + Coverage: &types.Coverage{ + CoverageHours: &types.CoverageHours{ + CoverageHoursPercentage: aws.String("99.9"), + }, + }, + }, + }, + }, + }, + }, + } + client := NewClientWithAPI(mock, "us-east-1") + + regions := []string{"us-east-1", "eu-west-2"} + got, err := client.GetRICoverageMap(context.Background(), 30, regions) + require.NoError(t, err) + // One call per (region, service) for non-RDS + one per (region, RDS engine). + // Total = len(regions) × (len(coverageServiceFilters) + len(rdsCoverageEngines)). + wantCalls := len(regions) * (len(coverageServiceFilters) + len(rdsCoverageEngines)) + assert.Equal(t, wantCalls, mock.coverageCalls, "one CE call per (region, service|engine) combo") + + // us-east-1 + db.r6g.large = 50% / avg 1. The mock returns the same + // canned response for every call, so the per-region loop writes the + // same key on every iteration — last write wins. + r6gLarge := got[poolKey("eu-west-2", "db.r6g.large")] + assert.InDelta(t, 50.0, r6gLarge.Pct, 0.001) + assert.InDelta(t, 1.0, r6gLarge.AvgInstancesPerHour, 0.001, "TotalRunningHours / window hours = avg concurrent instances") + m5xLarge := got[poolKey("eu-west-2", "db.m5.xlarge")] + assert.InDelta(t, 33.7, m5xLarge.Pct, 0.001, "SCREAMING_SNAKE_CASE attribute keys tolerated") + assert.InDelta(t, 10.0, m5xLarge.AvgInstancesPerHour, 0.001, "avg parsed regardless of attribute-key casing") + _, hasMissing := got[poolKey("us-east-1", "")] + assert.False(t, hasMissing, "groups missing INSTANCE_TYPE should be skipped, not emit empty keys") +} + +// TestGetRICoverageMap_LookbackDefault confirms that a non-positive lookback +// substitutes the 30-day default (matches GetRIUtilization's behaviour). +func TestGetRICoverageMap_LookbackDefault(t *testing.T) { + mock := &mockCoverageCE{coverageOutput: &costexplorer.GetReservationCoverageOutput{}} + client := NewClientWithAPI(mock, "us-east-1") + + regions := []string{"us-east-1"} + _, err := client.GetRICoverageMap(context.Background(), 0, regions) + require.NoError(t, err) + wantCalls := len(regions) * (len(coverageServiceFilters) + len(rdsCoverageEngines)) + assert.Equal(t, wantCalls, mock.coverageCalls) +} + +// TestApplyCoverageMapToRecommendations covers the org-wide pool matching: +// recs look up by (region, instance_type, [engine, deployment]) so any +// linked account in the org sees the same coverage % for the same pool +// (matches AWS console aggregation). +func TestApplyCoverageMapToRecommendations(t *testing.T) { + recs := []common.Recommendation{ + {Region: "us-east-1", ResourceType: "db.r6g.large", Account: "acct-a"}, + {Region: "eu-west-2", ResourceType: "db.r6g.large", Account: "acct-b"}, + {Region: "us-east-1", ResourceType: "db.m5.large", Account: "acct-a"}, // no match + } + coverage := PoolCoverageMap{ + poolKey("us-east-1", "db.r6g.large"): {Pct: 50.0}, + poolKey("eu-west-2", "db.r6g.large"): {Pct: 33.7}, + } + + ApplyCoverageMapToRecommendations(recs, coverage) + + assert.InDelta(t, 50.0, recs[0].ExistingCoveragePct, 0.001) + assert.True(t, recs[0].ExistingCoverageKnown, "matched pool sets Known=true") + assert.InDelta(t, 33.7, recs[1].ExistingCoveragePct, 0.001) + assert.True(t, recs[1].ExistingCoverageKnown, "matched pool sets Known=true") + assert.Equal(t, 0.0, recs[2].ExistingCoveragePct, "unmatched pool should leave field at zero (no signal)") + assert.False(t, recs[2].ExistingCoverageKnown, "unmatched pool leaves Known=false so CSV renders n/a") + + t.Run("empty map is a no-op", func(t *testing.T) { + recs := []common.Recommendation{ + {Region: "us-east-1", ResourceType: "db.r6g.large", ExistingCoveragePct: 42}, + } + ApplyCoverageMapToRecommendations(recs, nil) + assert.Equal(t, 42.0, recs[0].ExistingCoveragePct, "nil map must leave existing values untouched") + }) + + t.Run("case-insensitive matching for region/instance", func(t *testing.T) { + // Recommendations carry mixed-case region/type strings; the lookup + // must normalise both sides via poolKey. + recs := []common.Recommendation{{Region: "US-EAST-1", ResourceType: "DB.R6G.LARGE"}} + cov := PoolCoverageMap{poolKey("us-east-1", "db.r6g.large"): {Pct: 75.0}} + ApplyCoverageMapToRecommendations(recs, cov) + assert.InDelta(t, 75.0, recs[0].ExistingCoveragePct, 0.001) + }) + + t.Run("all accounts in same pool see the same coverage", func(t *testing.T) { + // Pools are now org-wide: prod and staging running the same + // instance type / engine / deployment in the same region see the + // same coverage % (matches AWS console aggregation across linked + // accounts). Whoever buys the RI covers the org-wide pool. + recs := []common.Recommendation{ + { + Service: common.ServiceRDS, + Region: "us-east-1", + ResourceType: "db.t4g.medium", + Account: "prod-account", + Details: &common.DatabaseDetails{Engine: "aurora-mysql", AZConfig: "single-az"}, + }, + { + Service: common.ServiceRDS, + Region: "us-east-1", + ResourceType: "db.t4g.medium", + Account: "staging-account", + Details: &common.DatabaseDetails{Engine: "aurora-mysql", AZConfig: "single-az"}, + }, + } + cov := PoolCoverageMap{ + rdsPoolKey("us-east-1", "db.t4g.medium", "Aurora MySQL", "Single-AZ"): {Pct: 55.0}, + } + ApplyCoverageMapToRecommendations(recs, cov) + assert.InDelta(t, 55.0, recs[0].ExistingCoveragePct, 0.001, "prod sees the org-wide 55% coverage") + assert.InDelta(t, 55.0, recs[1].ExistingCoveragePct, 0.001, "staging sees the same org-wide 55% coverage") + }) + + t.Run("avg-per-pool rebalances to coverage's org-wide signal", func(t *testing.T) { + // Single-rec-per-pool case: rec.avg is replaced with cov.avg via + // scale = cov.avg / rec.avg. Whatever AWS rec API surfaced as the + // per-account avg is replaced with the coverage's org-wide avg so + // downstream sizing buys the right number for the whole pool. + recs := []common.Recommendation{ + // rec[0]: no per-account avg; coverage provides one → recs[0].avg = cov.avg + {Region: "us-east-1", ResourceType: "m5.large"}, + // rec[1]: per-account avg present; coverage's larger avg now overrides + {Region: "us-east-1", ResourceType: "m5.xlarge", AverageInstancesUsedPerHour: 5.0}, + } + cov := PoolCoverageMap{ + poolKey("us-east-1", "m5.large"): {Pct: 50.0, AvgInstancesPerHour: 10.0}, + poolKey("us-east-1", "m5.xlarge"): {Pct: 30.0, AvgInstancesPerHour: 99.0}, + } + ApplyCoverageMapToRecommendations(recs, cov) + assert.InDelta(t, 10.0, recs[0].AverageInstancesUsedPerHour, 0.001, "no-signal rec gets coverage's org-wide avg") + assert.InDelta(t, 99.0, recs[1].AverageInstancesUsedPerHour, 0.001, "single rec in pool: avg replaced with coverage's org-wide avg (scale=99/5)") + }) + + t.Run("avg-per-pool splits proportionally across multiple recs", func(t *testing.T) { + // Two per-account recs in the same pool with avgs 24.4 and 23.2; + // CE coverage reports org-wide avg of 210 for the pool (other + // accounts not surfaced by AWS rec API). Each rec gets scaled by + // 210 / (24.4+23.2) ≈ 4.412 so the per-pool sum hits 210, which + // is what the AWS console coverage CSV would target. + recs := []common.Recommendation{ + { + Service: common.ServiceRDS, + Region: "us-east-1", + ResourceType: "db.t4g.medium", + Account: "production", + Details: &common.DatabaseDetails{Engine: "aurora-mysql", AZConfig: "single-az"}, + AverageInstancesUsedPerHour: 24.4, + }, + { + Service: common.ServiceRDS, + Region: "us-east-1", + ResourceType: "db.t4g.medium", + Account: "staging", + Details: &common.DatabaseDetails{Engine: "aurora-mysql", AZConfig: "single-az"}, + AverageInstancesUsedPerHour: 23.2, + }, + } + cov := PoolCoverageMap{ + rdsPoolKey("us-east-1", "db.t4g.medium", "Aurora MySQL", "Single-AZ"): { + Pct: 55.0, AvgInstancesPerHour: 210.0, + }, + } + ApplyCoverageMapToRecommendations(recs, cov) + // Per-rec scaled avgs sum to the coverage's org-wide avg. + sum := recs[0].AverageInstancesUsedPerHour + recs[1].AverageInstancesUsedPerHour + assert.InDelta(t, 210.0, sum, 0.001, "scaled per-rec avgs sum to coverage's org-wide avg") + // Proportions preserved: prod (24.4 / 47.6 = 51.3%) of the org total. + assert.InDelta(t, 210.0*24.4/47.6, recs[0].AverageInstancesUsedPerHour, 0.001, "prod gets its proportional share of org-wide avg") + assert.InDelta(t, 210.0*23.2/47.6, recs[1].AverageInstancesUsedPerHour, 0.001, "staging gets its proportional share of org-wide avg") + }) + + t.Run("avg-per-pool splits evenly when every rec has zero avg", func(t *testing.T) { + // Two recs in same pool, both with zero per-account avg signal. + // No per-rec proportion to preserve; coverage's avg splits evenly. + recs := []common.Recommendation{ + {Region: "us-east-1", ResourceType: "m5.large", Account: "a"}, + {Region: "us-east-1", ResourceType: "m5.large", Account: "b"}, + } + cov := PoolCoverageMap{ + poolKey("us-east-1", "m5.large"): {Pct: 50.0, AvgInstancesPerHour: 10.0}, + } + ApplyCoverageMapToRecommendations(recs, cov) + assert.InDelta(t, 5.0, recs[0].AverageInstancesUsedPerHour, 0.001, "even split across zero-signal recs in same pool") + assert.InDelta(t, 5.0, recs[1].AverageInstancesUsedPerHour, 0.001, "even split across zero-signal recs in same pool") + }) + + t.Run("avg-per-pool leaves rec.avg untouched when coverage has no avg signal", func(t *testing.T) { + // Coverage entry exists (sets ExistingCoveragePct) but has no avg + // signal (e.g., TotalRunningHours absent from CE response). The + // per-account avg is the only signal — leave it alone. + recs := []common.Recommendation{ + {Region: "us-east-1", ResourceType: "m5.large", AverageInstancesUsedPerHour: 7.5}, + } + cov := PoolCoverageMap{ + poolKey("us-east-1", "m5.large"): {Pct: 50.0, AvgInstancesPerHour: 0}, + } + ApplyCoverageMapToRecommendations(recs, cov) + assert.InDelta(t, 50.0, recs[0].ExistingCoveragePct, 0.001) + assert.InDelta(t, 7.5, recs[0].AverageInstancesUsedPerHour, 0.001, "no coverage avg signal → leave rec avg as-is") + }) + + t.Run("RDS recs look up with engine + deployment-aware key", func(t *testing.T) { + // Same region + instance type, different engines and deployments — + // the per-engine fetcher writes one entry per (engine, deployment) + // and the lookup must pick the right one. CE-side ("Aurora MySQL", + // "Single-AZ") and parser-side ("aurora-mysql", "single-az") forms + // collapse to the same key. + recs := []common.Recommendation{ + { + Service: common.ServiceRDS, + Region: "eu-west-2", + ResourceType: "db.r6g.large", + Details: &common.DatabaseDetails{Engine: "aurora-mysql", AZConfig: "single-az"}, + }, + { + Service: common.ServiceRDS, + Region: "eu-west-2", + ResourceType: "db.r6g.large", + Details: &common.DatabaseDetails{Engine: "mysql", AZConfig: "multi-az"}, + }, + { + // Same engine as rec[0] but Multi-AZ — a different pool + // scope (a Single-AZ RI can't cover Multi-AZ demand). + Service: common.ServiceRDS, + Region: "eu-west-2", + ResourceType: "db.r6g.large", + Details: &common.DatabaseDetails{Engine: "aurora-mysql", AZConfig: "multi-az"}, + }, + } + cov := PoolCoverageMap{ + rdsPoolKey("eu-west-2", "db.r6g.large", "Aurora MySQL", "Single-AZ"): {Pct: 98.5}, + rdsPoolKey("eu-west-2", "db.r6g.large", "MySQL", "Multi-AZ"): {Pct: 0.0}, + rdsPoolKey("eu-west-2", "db.r6g.large", "Aurora MySQL", "Multi-AZ"): {Pct: 12.0}, + } + ApplyCoverageMapToRecommendations(recs, cov) + assert.InDelta(t, 98.5, recs[0].ExistingCoveragePct, 0.001, "aurora-mysql single-az rec picks up its own coverage") + assert.Equal(t, 0.0, recs[1].ExistingCoveragePct, "mysql multi-az rec sees only its own coverage") + assert.InDelta(t, 12.0, recs[2].ExistingCoveragePct, 0.001, "aurora-mysql multi-az is a distinct pool from single-az") + }) +} + +// TestNormaliseRDSEngine locks the canonicalisation of CE-side and +// parser-side engine strings to the same lookup key. Without this both +// producer (per-engine fetcher) and consumer (apply helper) would write +// or read differently and miss the lookup entirely. +func TestNormaliseRDSEngine(t *testing.T) { + cases := map[string]string{ + // CE-side strings (human readable, mixed case, spaces). + "Aurora MySQL": "auroramysql", + "Aurora PostgreSQL": "aurorapostgresql", + "MySQL": "mysql", + "PostgreSQL": "postgresql", + "SQL Server": "sqlserver", + // Parser-side strings (lowercase, hyphenated). + "aurora-mysql": "auroramysql", + "aurora-postgresql": "aurorapostgresql", + // "postgres" is the bare RDS engine slug that DescribeDBInstances + // returns; CE coverage emits keys under "postgresql" (the display + // form). Collapse so both sides land on the same lookup key. + "postgres": "postgresql", + "POSTGRES": "postgresql", + "postgresql": "postgresql", + // SQL Server edition slugs (Express / Web / Standard / Enterprise) + // collapse to the bare family; CE coverage emits keys under + // "sqlserver" (from the "SQL Server" display form already covered + // in the CE-side block above). + "sqlserver-ee": "sqlserver", + "sqlserver-se": "sqlserver", + "sqlserver-ex": "sqlserver", + "sqlserver-web": "sqlserver", + // Oracle edition slugs collapse to the bare family too. + "oracle-ee": "oracle", + "oracle-se": "oracle", + "oracle-se1": "oracle", + "oracle-se2": "oracle", + // Edge cases. + "": "", + "MYSQL": "mysql", + } + for in, want := range cases { + t.Run(in, func(t *testing.T) { + assert.Equal(t, want, normaliseRDSEngine(in)) + }) + } +} + +// TestNormaliseDeployment locks deployment-option canonicalisation. CE +// returns "Single-AZ" / "Multi-AZ" / "Multi-AZ (readable standbys)" while +// the parser stores "single-az" / "multi-az"; both forms must collapse to +// the same lookup key. +func TestNormaliseDeployment(t *testing.T) { + cases := map[string]string{ + "Single-AZ": "singleaz", + "single-az": "singleaz", + "Multi-AZ": "multiaz", + "multi-az": "multiaz", + "Multi-AZ (readable standbys)": "multiazreadablestandbys", + "": "", + } + for in, want := range cases { + t.Run(in, func(t *testing.T) { + assert.Equal(t, want, normaliseDeployment(in)) + }) + } +} diff --git a/providers/aws/recommendations/expiry.go b/providers/aws/recommendations/expiry.go new file mode 100644 index 00000000..0975fdd7 --- /dev/null +++ b/providers/aws/recommendations/expiry.go @@ -0,0 +1,116 @@ +package recommendations + +import ( + "time" + + "github.com/LeanerCloud/CUDly/pkg/common" +) + +// AdjustExistingCoverageForExpiringCommitments reduces each rec's +// ExistingCoveragePct by the share of pool demand attributable to RIs that +// expire within windowDays. Sized purchases downstream then treat the +// expiring share as already-uncovered and recommend replacements. +// +// Returns the number of recommendations whose ExistingCoveragePct was +// adjusted, so the caller can log a meaningful summary. +// +// No-op when windowDays <= 0 or commitments is empty. The intent is that +// --rebuy-window-days controls whether this adjustment runs at all; +// callers gate the invocation on cfg.RebuyWindowDays. +// +// Pool matching uses the same engine-aware key as the coverage map so the +// adjustment lines up with the data ApplyCoverageMapToRecommendations +// just populated. Commitments whose pool key doesn't match any rec are +// silently dropped (they're for instance types we're not currently +// recommending; nothing to subtract from). +func AdjustExistingCoverageForExpiringCommitments( + recs []common.Recommendation, + commitments []common.Commitment, + windowDays int, +) int { + if windowDays <= 0 || len(commitments) == 0 { + return 0 + } + cutoff := time.Now().Add(time.Duration(windowDays) * 24 * time.Hour) + expiringByPool := expiringCountsByPool(commitments, cutoff) + if len(expiringByPool) == 0 { + return 0 + } + return applyExpiringAdjustments(recs, expiringByPool) +} + +// expiringCountsByPool aggregates active commitments expiring at-or-before +// cutoff into a pool-keyed count map. The State filter mirrors +// DuplicateChecker.filterRecentCommitments — only RIs currently providing +// coverage are counted (queued / retired RIs aren't covering demand now). +func expiringCountsByPool(commitments []common.Commitment, cutoff time.Time) map[string]int { + out := make(map[string]int) + for _, c := range commitments { + if !commitmentIsActive(c) { + continue + } + if c.EndDate.IsZero() || c.EndDate.After(cutoff) { + continue + } + key := commitmentPoolKey(c) + if key == "" { + continue + } + out[key] += c.Count + } + return out +} + +// applyExpiringAdjustments subtracts each rec's matching expiring-pool +// share from ExistingCoveragePct, clamping at zero. Returns the number +// of recs touched. Recs without a positive avg signal are skipped — we +// can't compute a per-pool percentage without it. +func applyExpiringAdjustments(recs []common.Recommendation, expiringByPool map[string]int) int { + adjusted := 0 + for i := range recs { + if recs[i].AverageInstancesUsedPerHour <= 0 { + continue + } + expCount, ok := expiringByPool[lookupPoolKey(recs[i])] + if !ok || expCount == 0 { + continue + } + // Convert expiring count to percentage of pool demand. Clamp to + // avoid negative ExistingCoveragePct if the expiring share exceeds + // the CE-reported existing coverage (can happen when CE coverage + // is org-wide-averaged below per-pool truth, or when expiring + // count includes ZONAL RIs the regional aggregate doesn't credit). + expiringPct := float64(expCount) / recs[i].AverageInstancesUsedPerHour * 100.0 + if expiringPct > recs[i].ExistingCoveragePct { + recs[i].ExistingCoveragePct = 0 + } else { + recs[i].ExistingCoveragePct -= expiringPct + } + adjusted++ + } + return adjusted +} + +// commitmentIsActive returns true for commitments whose State indicates +// they're currently providing coverage. Matches the state set used by +// DuplicateChecker for consistency. +func commitmentIsActive(c common.Commitment) bool { + return c.State == "active" || c.State == "payment-pending" +} + +// commitmentPoolKey returns the same lookup key shape used by the coverage +// map: engine + deployment-aware for RDS, region+instance-type for +// everything else. Keys are org-wide (no account dimension) so an +// expiring RI in one linked account adjusts the org-wide coverage signal +// for the pool it belongs to — matching the way the coverage map itself +// is now aggregated. The Deployment field on common.Commitment carries +// the same "single-az"/"multi-az" vocabulary as DatabaseDetails.AZConfig +// on Recommendation, so a Multi-AZ commitment's expiry adjusts only the +// Multi-AZ pool's existing-coverage signal (a Single-AZ RI cannot cover +// Multi-AZ demand and vice versa). +func commitmentPoolKey(c common.Commitment) string { + if c.Service == common.ServiceRDS || c.Service == common.ServiceRelationalDB { + return rdsPoolKey(c.Region, c.ResourceType, c.Engine, c.Deployment) + } + return poolKey(c.Region, c.ResourceType) +} diff --git a/providers/aws/recommendations/expiry_test.go b/providers/aws/recommendations/expiry_test.go new file mode 100644 index 00000000..ad791d7f --- /dev/null +++ b/providers/aws/recommendations/expiry_test.go @@ -0,0 +1,251 @@ +package recommendations + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" + + "github.com/LeanerCloud/CUDly/pkg/common" +) + +// TestAdjustExistingCoverageForExpiringCommitments covers the four cases: +// the no-op guards, the typical case (some commitments expiring, some not), +// the over-subtract clamp, and pool-key mismatch (commitment for a pool +// nobody recommended). +func TestAdjustExistingCoverageForExpiringCommitments(t *testing.T) { + now := time.Now() + soon := now.Add(15 * 24 * time.Hour) // expires in 15 days + later := now.Add(180 * 24 * time.Hour) // expires in 180 days + pastDate := now.Add(-7 * 24 * time.Hour) // already expired + pgEngine := "Aurora PostgreSQL" + + t.Run("no-op when windowDays is zero", func(t *testing.T) { + recs := []common.Recommendation{{ + Service: common.ServiceRDS, + Region: "us-east-1", + ResourceType: "db.r6g.large", + AverageInstancesUsedPerHour: 10, + ExistingCoveragePct: 80, + Details: &common.DatabaseDetails{Engine: pgEngine}, + }} + commits := []common.Commitment{{ + Service: common.ServiceRDS, + Region: "us-east-1", + ResourceType: "db.r6g.large", + Engine: pgEngine, + Count: 5, + State: "active", + EndDate: soon, + }} + n := AdjustExistingCoverageForExpiringCommitments(recs, commits, 0) + assert.Equal(t, 0, n) + assert.Equal(t, 80.0, recs[0].ExistingCoveragePct, "ExistingCoveragePct must be untouched when windowDays=0") + }) + + t.Run("typical case: subtracts expiring share within window", func(t *testing.T) { + // avg=10, existing=80%, 5 of those covered by RIs expiring in 15 days. + // expiringPct = 5/10*100 = 50. New existing = 80 - 50 = 30. + recs := []common.Recommendation{{ + Service: common.ServiceRDS, + Region: "us-east-1", + ResourceType: "db.r6g.large", + AverageInstancesUsedPerHour: 10, + ExistingCoveragePct: 80, + Details: &common.DatabaseDetails{Engine: pgEngine}, + }} + commits := []common.Commitment{{ + Service: common.ServiceRDS, + Region: "us-east-1", + ResourceType: "db.r6g.large", + Engine: pgEngine, + Count: 5, + State: "active", + EndDate: soon, + }} + n := AdjustExistingCoverageForExpiringCommitments(recs, commits, 30) + assert.Equal(t, 1, n, "one rec adjusted") + assert.InDelta(t, 30.0, recs[0].ExistingCoveragePct, 0.001) + }) + + t.Run("deployment-aware matching: Single-AZ commit only adjusts Single-AZ rec", func(t *testing.T) { + // Two recs for the same (region, type, engine) but different + // deployments. An expiring Single-AZ commitment must only affect + // the Single-AZ rec's existing-coverage signal — a Single-AZ RI + // cannot cover Multi-AZ demand. Without Deployment threaded into + // commitmentPoolKey, the commitment's empty-deployment key would + // miss both recs (both have deployment-aware lookup keys). + recs := []common.Recommendation{ + { + Service: common.ServiceRDS, + Region: "us-east-1", + ResourceType: "db.r6g.large", + AverageInstancesUsedPerHour: 10, + ExistingCoveragePct: 80, + Details: &common.DatabaseDetails{Engine: pgEngine, AZConfig: "single-az"}, + }, + { + Service: common.ServiceRDS, + Region: "us-east-1", + ResourceType: "db.r6g.large", + AverageInstancesUsedPerHour: 10, + ExistingCoveragePct: 80, + Details: &common.DatabaseDetails{Engine: pgEngine, AZConfig: "multi-az"}, + }, + } + commits := []common.Commitment{{ + Service: common.ServiceRDS, + Region: "us-east-1", + ResourceType: "db.r6g.large", + Engine: pgEngine, + Deployment: "single-az", + Count: 5, + State: "active", + EndDate: soon, + }} + n := AdjustExistingCoverageForExpiringCommitments(recs, commits, 30) + assert.Equal(t, 1, n, "only the Single-AZ rec adjusted") + assert.InDelta(t, 30.0, recs[0].ExistingCoveragePct, 0.001, "Single-AZ rec: 80% - 50% expiring = 30%") + assert.Equal(t, 80.0, recs[1].ExistingCoveragePct, "Multi-AZ rec untouched (different deployment pool)") + }) + + t.Run("commitments expiring outside window are ignored", func(t *testing.T) { + recs := []common.Recommendation{{ + Service: common.ServiceRDS, + Region: "us-east-1", + ResourceType: "db.r6g.large", + AverageInstancesUsedPerHour: 10, + ExistingCoveragePct: 80, + Details: &common.DatabaseDetails{Engine: pgEngine}, + }} + commits := []common.Commitment{{ + Service: common.ServiceRDS, + Region: "us-east-1", + ResourceType: "db.r6g.large", + Engine: pgEngine, + Count: 5, + State: "active", + EndDate: later, // outside window + }} + n := AdjustExistingCoverageForExpiringCommitments(recs, commits, 30) + assert.Equal(t, 0, n) + assert.Equal(t, 80.0, recs[0].ExistingCoveragePct) + }) + + t.Run("over-subtract clamps to zero", func(t *testing.T) { + // Expiring count exceeds the share that ExistingCoveragePct claims — + // can happen when CE coverage is averaged across accounts but the + // commitments list is for the full region. Clamp to 0 rather than + // going negative. + recs := []common.Recommendation{{ + Service: common.ServiceRDS, + Region: "us-east-1", + ResourceType: "db.r6g.large", + AverageInstancesUsedPerHour: 10, + ExistingCoveragePct: 20, // only 20% per CE + Details: &common.DatabaseDetails{Engine: pgEngine}, + }} + commits := []common.Commitment{{ + Service: common.ServiceRDS, + Region: "us-east-1", + ResourceType: "db.r6g.large", + Engine: pgEngine, + Count: 5, // 5/10 = 50% expiring + State: "active", + EndDate: soon, + }} + n := AdjustExistingCoverageForExpiringCommitments(recs, commits, 30) + assert.Equal(t, 1, n) + assert.Equal(t, 0.0, recs[0].ExistingCoveragePct, "must clamp to 0, not go negative") + }) + + t.Run("non-matching pool key is skipped", func(t *testing.T) { + recs := []common.Recommendation{{ + Service: common.ServiceRDS, + Region: "us-east-1", + ResourceType: "db.r6g.large", + AverageInstancesUsedPerHour: 10, + ExistingCoveragePct: 80, + Details: &common.DatabaseDetails{Engine: pgEngine}, + }} + commits := []common.Commitment{{ + Service: common.ServiceRDS, + Region: "us-east-1", + ResourceType: "db.r6g.large", + Engine: "MySQL", // different engine + Count: 5, + State: "active", + EndDate: soon, + }} + n := AdjustExistingCoverageForExpiringCommitments(recs, commits, 30) + assert.Equal(t, 0, n, "engine mismatch should skip") + assert.Equal(t, 80.0, recs[0].ExistingCoveragePct) + }) + + t.Run("inactive commitments are skipped", func(t *testing.T) { + recs := []common.Recommendation{{ + Service: common.ServiceRDS, + Region: "us-east-1", + ResourceType: "db.r6g.large", + AverageInstancesUsedPerHour: 10, + ExistingCoveragePct: 80, + Details: &common.DatabaseDetails{Engine: pgEngine}, + }} + commits := []common.Commitment{{ + Service: common.ServiceRDS, + Region: "us-east-1", + ResourceType: "db.r6g.large", + Engine: pgEngine, + Count: 5, + State: "retired", // not active + EndDate: soon, + }} + n := AdjustExistingCoverageForExpiringCommitments(recs, commits, 30) + assert.Equal(t, 0, n, "retired commitments don't currently provide coverage; skip") + assert.Equal(t, 80.0, recs[0].ExistingCoveragePct) + }) + + t.Run("active commits with EndDate before cutoff are counted (incl. past dates)", func(t *testing.T) { + // The window check is EndDate <= cutoff: past dates satisfy that + // too. State="active" + EndDate now" as an additional guard and silently change the + // semantics of pastDate commitments. + recs := []common.Recommendation{{ + Service: common.ServiceRDS, + Region: "us-east-1", + ResourceType: "db.r6g.large", + AverageInstancesUsedPerHour: 10, + ExistingCoveragePct: 80, + Details: &common.DatabaseDetails{Engine: pgEngine}, + }} + commits := []common.Commitment{{ + Service: common.ServiceRDS, + Region: "us-east-1", + ResourceType: "db.r6g.large", + Engine: pgEngine, + Count: 5, + State: "active", + EndDate: pastDate, + }} + n := AdjustExistingCoverageForExpiringCommitments(recs, commits, 30) + assert.Equal(t, 1, n, "EndDate=pastDate <= cutoff so the commit IS counted") + assert.InDelta(t, 30.0, recs[0].ExistingCoveragePct, 0.001) + }) + + t.Run("no-op when commitments is empty", func(t *testing.T) { + recs := []common.Recommendation{{ + Service: common.ServiceRDS, + AverageInstancesUsedPerHour: 10, + ExistingCoveragePct: 50, + }} + n := AdjustExistingCoverageForExpiringCommitments(recs, nil, 30) + assert.Equal(t, 0, n) + assert.Equal(t, 50.0, recs[0].ExistingCoveragePct) + }) +} diff --git a/providers/aws/recommendations/family_nu.go b/providers/aws/recommendations/family_nu.go new file mode 100644 index 00000000..475945f8 --- /dev/null +++ b/providers/aws/recommendations/family_nu.go @@ -0,0 +1,319 @@ +package recommendations + +import ( + "math" + "strings" + + "github.com/LeanerCloud/CUDly/pkg/common" +) + +// rdsInstanceNU maps an RDS instance size suffix to the normalized-units +// value AWS uses for RDS RI size-flexibility within an instance family. +// Reference: +// https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/USER_WorkingWithReservedDBInstances.html#USER_WorkingWithReservedDBInstances.SizeFlexible +// +// Used by ApplyFamilyNUSizingRDS to translate AWS-rec-API's family-NU- +// bundled buy recommendations into the family target NU need under +// --target-coverage. Sizes not in this map evaluate to 0 NU, which +// causes the family-NU step to leave the rec unchanged (the per-pool +// sizing path still handles it downstream). +var rdsInstanceNU = map[string]float64{ + "nano": 0.25, + "micro": 0.5, + "small": 1, + "medium": 2, + "large": 4, + "xlarge": 8, + "2xlarge": 16, + "4xlarge": 32, + "8xlarge": 64, + "10xlarge": 80, + "12xlarge": 96, + "16xlarge": 128, + "24xlarge": 192, + "32xlarge": 256, +} + +// RDSInstanceNUFromType is the exported counterpart of rdsInstanceNUFromType +// for callers outside this package that need the NU value for an RDS +// instance type (e.g. CSV writers displaying per-row family-NU +// contribution). Returns 0 for unknown sizes — same fallback semantics +// as the unexported helper. +func RDSInstanceNUFromType(instanceType string) float64 { + return rdsInstanceNUFromType(instanceType) +} + +// RDSFamilyFromType is the exported counterpart of rdsFamilyFromType for +// callers outside this package that need the family prefix of an RDS +// instance type (e.g. CSV writers grouping rows by family). Empty +// string when the type doesn't carry a recognisable size suffix. +func RDSFamilyFromType(instanceType string) string { + return rdsFamilyFromType(instanceType) +} + +// rdsInstanceNUFromType returns the NU value for an instance type like +// "db.r7g.2xlarge", parsing out the size suffix ("2xlarge" → 16). Returns +// 0 when the size isn't recognised — callers treat that as "no family-NU +// signal" and fall back to per-pool sizing. +func rdsInstanceNUFromType(instanceType string) float64 { + parts := strings.Split(instanceType, ".") + if len(parts) < 3 { + return 0 + } + return rdsInstanceNU[parts[len(parts)-1]] +} + +// rdsFamilyFromType returns the family prefix for an instance type, e.g. +// "db.r7g.2xlarge" → "db.r7g". Empty string when the type doesn't carry +// a recognisable size suffix. +func rdsFamilyFromType(instanceType string) string { + parts := strings.Split(instanceType, ".") + if len(parts) < 3 { + return "" + } + return strings.Join(parts[:len(parts)-1], ".") +} + +// rdsFamilyKey returns the lookup key for an RDS family's aggregated +// coverage view. Matches the shape of rdsPoolKey but at family-level +// granularity ("db.r7g" rather than "db.r7g.large"), so the family-NU +// step groups all sizes within (region, family, engine, deployment) into +// a single demand bucket. +func rdsFamilyKey(region, family, engine, deployment string) string { + return strings.ToLower(region) + ":" + + strings.ToLower(family) + ":" + + normaliseRDSEngine(engine) + ":" + + normaliseDeployment(deployment) +} + +// FamilyCoverage is the family-NU-aggregated view of coverage for an +// RDS instance family. TotalNU is the sum of (avg × NU(size)) across +// every size in the family that has CE coverage data; CoveredNU is the +// sum of (avg × NU(size) × pct/100). Pct can be derived as +// CoveredNU / TotalNU × 100 (callers do this inline). +type FamilyCoverage struct { + TotalNU float64 + CoveredNU float64 +} + +// AggregateRDSFamilyCoverage walks the per-pool coverage map and returns +// a family-NU-aggregated view keyed by rdsFamilyKey. Used by family-NU +// sizing to size buys against the total family demand rather than +// per-size pool demand — matching how AWS's +// GetReservationPurchaseRecommendation bundles size-flex demand into a +// single rec at one size. +// +// Skips non-RDS entries (their keys are 2-part, not 4-part) and entries +// whose instance size isn't in rdsInstanceNU. +func AggregateRDSFamilyCoverage(coverage PoolCoverageMap) map[string]FamilyCoverage { + out := make(map[string]FamilyCoverage) + for key, pc := range coverage { + parts := strings.Split(key, ":") + if len(parts) != 4 { + // Non-RDS pool keys are "region:instance_type" (2 parts). + continue + } + region, instType, engine, deployment := parts[0], parts[1], parts[2], parts[3] + nu := rdsInstanceNUFromType(instType) + if nu == 0 { + continue + } + family := rdsFamilyFromType(instType) + if family == "" { + continue + } + fk := rdsFamilyKey(region, family, engine, deployment) + cur := out[fk] + cur.TotalNU += pc.AvgInstancesPerHour * nu + cur.CoveredNU += pc.AvgInstancesPerHour * nu * pc.Pct / 100.0 + out[fk] = cur + } + return out +} + +// ApplyFamilyNUSizingRDS replaces per-pool RI sizing with family-NU +// sizing for RDS recommendations. AWS's GetReservationPurchaseRecommendation +// already bundles size-flex demand within an instance family into a +// single recommendation at one size; per-pool sizing under-buys because +// it only sees that size's pool demand. Family-NU sizing rescales each +// RDS rec's Count so the family-wide NU sum matches the user's target +// across the whole family. +// +// Algorithm: +// 1. Group RDS recs by (region, family, engine, deployment). +// 2. For each family: +// a. Compute family existing_pct = covered_NU / total_NU * 100 +// b. gap = targetPct − existing_pct (drop family if ≤ 0) +// c. target_NU_need = gap / 100 * total_NU +// d. current_rec_NU = Σ rec.Count × NU(rec.size) +// e. scale = target_NU_need / current_rec_NU +// f. Apply scale to each rec.Count and cost-bearing fields +// 3. Recs scaled to zero are dropped (size flex left no room). +// 4. Non-RDS recs are returned unchanged so callers can continue them +// through the per-pool sizing path. +// +// Returns (sizedRDS, nonRDS). When targetPct is outside (0,100] or +// coverage has no family-NU signal for an RDS rec's family, the rec is +// passed through unchanged in sizedRDS (so per-pool sizing downstream +// doesn't re-process it — caller treats sizedRDS as already sized). +func ApplyFamilyNUSizingRDS( + recs []common.Recommendation, + coverage PoolCoverageMap, + targetPct float64, +) (sizedRDS, nonRDS []common.Recommendation) { + if targetPct <= 0 || targetPct > 100 { + return nil, recs + } + familyCov := AggregateRDSFamilyCoverage(coverage) + familyIdx, nonRDS := partitionRDSRecsByFamily(recs) + for fk, indices := range familyIdx { + sized := sizeRDSFamilyRecs(recs, indices, familyCov[fk], targetPct) + sizedRDS = append(sizedRDS, sized...) + } + return sizedRDS, nonRDS +} + +// partitionRDSRecsByFamily splits recs into (a) an index map keyed by +// rdsFamilyKey that groups RDS RI recs by their (region, family, engine, +// deployment), and (b) a slice of non-RDS recs that flow through to the +// caller's per-pool sizing path unchanged. Recs that are RDS RIs but +// carry an instance type without a recognisable size suffix (and thus +// can't be NU-scaled) fall into nonRDS so per-pool sizing still handles +// them. +func partitionRDSRecsByFamily(recs []common.Recommendation) (map[string][]int, []common.Recommendation) { + familyIdx := make(map[string][]int) + nonRDS := make([]common.Recommendation, 0) + for i := range recs { + if !isRDSRIRec(recs[i]) { + nonRDS = append(nonRDS, recs[i]) + continue + } + family := rdsFamilyFromType(recs[i].ResourceType) + if family == "" { + nonRDS = append(nonRDS, recs[i]) + continue + } + engine, deployment := rdsEngineDeploymentFromRec(recs[i]) + fk := rdsFamilyKey(recs[i].Region, family, engine, deployment) + familyIdx[fk] = append(familyIdx[fk], i) + } + return familyIdx, nonRDS +} + +// sizeRDSFamilyRecs sizes the recs in one family-key group: returns the +// sized recs, drops empty/over-target families, and returns the +// unchanged AWS-recommended counts when there's no coverage signal. +// +// First pass scales each rec's Count and cost-bearing fields by the +// family-wide scale factor; second pass sets the same family-level +// ProjectedCoverage on every surviving rec so operators see the +// cumulative family projection (existing% + sum-of-new-NU / totalNU) +// rather than each rec's standalone contribution. Without the second +// pass, a family with N recs would show N different ProjectedCoverage +// values, each missing the other N-1 recs' contributions. +func sizeRDSFamilyRecs( + recs []common.Recommendation, + indices []int, + family FamilyCoverage, + targetPct float64, +) []common.Recommendation { + if family.TotalNU <= 0 { + // No coverage signal — keep AWS-recommended counts as-is. + out := make([]common.Recommendation, 0, len(indices)) + for _, i := range indices { + out = append(out, recs[i]) + } + return out + } + existingPct := family.CoveredNU / family.TotalNU * 100.0 + gap := targetPct - existingPct + if gap <= 0 { + // Family already at-or-above target — drop the whole family's recs. + return nil + } + targetNU := gap / 100.0 * family.TotalNU + currentNU := 0.0 + for _, i := range indices { + currentNU += float64(recs[i].Count) * rdsInstanceNUFromType(recs[i].ResourceType) + } + if currentNU <= 0 { + // Recs sum to zero NU — nothing to scale. + return nil + } + scale := targetNU / currentNU + sized, totalNewNU := scaleFamilyRecs(recs, indices, scale) + annotateFamilyProjection(sized, existingPct, totalNewNU, family.TotalNU) + return sized +} + +// scaleFamilyRecs is the first pass of sizeRDSFamilyRecs: applies the +// family-wide scale factor to each rec's count and cost-bearing fields, +// returning the surviving recs (newCount > 0) and the cumulative +// post-scaling NU across them. +func scaleFamilyRecs(recs []common.Recommendation, indices []int, scale float64) ([]common.Recommendation, float64) { + sized := make([]common.Recommendation, 0, len(indices)) + totalNewNU := 0.0 + for _, i := range indices { + rec, kept := scaleRDSRecInFamily(recs[i], scale) + if !kept { + continue + } + sized = append(sized, rec) + totalNewNU += float64(rec.Count) * rdsInstanceNUFromType(rec.ResourceType) + } + return sized, totalNewNU +} + +// annotateFamilyProjection is the second pass: computes the cumulative +// family ProjectedCoverage (existing% plus totalNewNU / familyTotalNU +// expressed as %) and writes the same value onto every rec along with +// the matching ExistingCoveragePct and per-rec ProjectedUtilization. The +// same projection lands on every row so each one reflects "where the +// family lands" rather than its own slice. +func annotateFamilyProjection(sized []common.Recommendation, existingPct, totalNewNU, familyTotalNU float64) { + familyProj := existingPct + totalNewNU/familyTotalNU*100.0 + if familyProj > 100 { + familyProj = 100 + } + for i := range sized { + sized[i].ExistingCoveragePct = existingPct + sized[i].ExistingCoverageKnown = true + sized[i].ProjectedCoverage = familyProj + if sized[i].AverageInstancesUsedPerHour > 0 { + util := sized[i].AverageInstancesUsedPerHour / float64(sized[i].Count) * 100.0 + if util > 100 { + util = 100 + } + sized[i].ProjectedUtilization = util + } + } +} + +// scaleRDSRecInFamily applies the family-wide scale factor to one rec: +// computes newCount = floor(oldCount × scale) and scales cost-bearing +// fields by the same ratio. Returns (rec, false) when newCount is zero +// so the caller drops the rec (family target left no room for it). +// Projection / utilization metrics are NOT set here — those need the +// family-wide cumulative NU which only the caller knows after sizing +// every rec in the family. +func scaleRDSRecInFamily(rec common.Recommendation, scale float64) (common.Recommendation, bool) { + oldCount := rec.Count + newCount := int(math.Floor(float64(oldCount) * scale)) + if newCount <= 0 { + return rec, false + } + ratio := float64(newCount) / float64(oldCount) + rec = common.ScaleRecommendationCosts(rec, ratio) + rec.Count = newCount + return rec, true +} + +// isRDSRIRec reports whether rec is an RDS-family RI recommendation that +// the family-NU pass should size. Excludes Savings Plans and any other +// commitment type so they reach the per-pool sizing path unchanged. +func isRDSRIRec(rec common.Recommendation) bool { + if rec.CommitmentType != common.CommitmentReservedInstance { + return false + } + return rec.Service == common.ServiceRDS || rec.Service == common.ServiceRelationalDB +} diff --git a/providers/aws/recommendations/family_nu_test.go b/providers/aws/recommendations/family_nu_test.go new file mode 100644 index 00000000..e2474069 --- /dev/null +++ b/providers/aws/recommendations/family_nu_test.go @@ -0,0 +1,389 @@ +package recommendations + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/LeanerCloud/CUDly/pkg/common" +) + +// TestRdsInstanceNUFromType covers the size→NU mapping for representative +// sizes plus the unknown-size fallback (returns 0 so callers know to skip +// family-NU sizing for that rec). +func TestRdsInstanceNUFromType(t *testing.T) { + cases := map[string]float64{ + "db.r7g.large": 4, + "db.r6g.2xlarge": 16, + "db.t4g.medium": 2, + "db.t4g.micro": 0.5, + "db.r6g.16xlarge": 128, + "db.r6g.24xlarge": 192, + "db.r6g.unknown": 0, // fallback + "not-an-rds-type": 0, // too few dots + "": 0, + } + for in, want := range cases { + t.Run(in, func(t *testing.T) { + assert.Equal(t, want, rdsInstanceNUFromType(in)) + }) + } +} + +// TestRdsFamilyFromType locks the family-prefix extraction so all sizes +// in the same family share a lookup key. +func TestRdsFamilyFromType(t *testing.T) { + cases := map[string]string{ + "db.r7g.large": "db.r7g", + "db.r7g.2xlarge": "db.r7g", + "db.t4g.medium": "db.t4g", + "db.m5.xlarge": "db.m5", + "db.m6gd.xlarge": "db.m6gd", + "": "", + "db.r7g": "", // missing size suffix + } + for in, want := range cases { + t.Run(in, func(t *testing.T) { + assert.Equal(t, want, rdsFamilyFromType(in)) + }) + } +} + +// TestAggregateRDSFamilyCoverage confirms per-pool coverage entries roll +// up to family-NU correctly: sum-of-(avg × size_NU) for TotalNU and +// sum-of-(avg × size_NU × pct/100) for CoveredNU. Mirrors the manual +// family-level table that proved size flex was implicit in AWS rec API. +func TestAggregateRDSFamilyCoverage(t *testing.T) { + // Family: us-east-1 / db.r7g / Aurora MySQL / Single-AZ + // Two sizes contributing: + // db.r7g.large avg=40.6 cov=58% → 162.4 NU running, 94.2 NU covered + // db.r7g.xlarge avg= 3.9 cov= 0% → 31.2 NU running, 0 NU covered + // Family totals: 193.6 NU running, 94.2 NU covered. + cov := PoolCoverageMap{ + rdsPoolKey("us-east-1", "db.r7g.large", "Aurora MySQL", "Single-AZ"): { + Pct: 58.0, AvgInstancesPerHour: 40.6, + }, + rdsPoolKey("us-east-1", "db.r7g.xlarge", "Aurora MySQL", "Single-AZ"): { + Pct: 0.0, AvgInstancesPerHour: 3.9, + }, + // Different family in same region — should NOT roll into the r7g key. + rdsPoolKey("us-east-1", "db.t4g.medium", "Aurora MySQL", "Single-AZ"): { + Pct: 50.0, AvgInstancesPerHour: 100.0, + }, + // Same instance, different deployment — distinct family bucket. + rdsPoolKey("us-east-1", "db.r7g.large", "Aurora MySQL", "Multi-AZ"): { + Pct: 0.0, AvgInstancesPerHour: 5.0, + }, + // Non-RDS pool key (2-part) — should be skipped entirely. + poolKey("us-east-1", "m5.large"): {Pct: 50.0, AvgInstancesPerHour: 10.0}, + } + agg := AggregateRDSFamilyCoverage(cov) + + fk := rdsFamilyKey("us-east-1", "db.r7g", "Aurora MySQL", "Single-AZ") + r7g := agg[fk] + assert.InDelta(t, 40.6*4+3.9*8, r7g.TotalNU, 0.01, "TotalNU = sum(avg × NU(size))") + assert.InDelta(t, 40.6*4*0.58+3.9*8*0.0, r7g.CoveredNU, 0.01, "CoveredNU = sum(avg × NU(size) × pct/100)") + + // Different deployment — separate bucket. + multi := agg[rdsFamilyKey("us-east-1", "db.r7g", "Aurora MySQL", "Multi-AZ")] + assert.InDelta(t, 5.0*4, multi.TotalNU, 0.01, "Multi-AZ keeps a separate family bucket") + + // Non-RDS key skipped. + assert.NotContains(t, agg, "us-east-1:m5.large", "non-RDS pool keys should be ignored") +} + +// TestApplyFamilyNUSizingRDS exercises the four important cases: +// 1. AWS rec at-target NU → scale = 1, no change +// 2. AWS rec under-recommends → scale > 1, counts grow +// 3. AWS rec over-recommends → scale < 1, counts shrink +// 4. Family already at target → all recs dropped +func TestApplyFamilyNUSizingRDS(t *testing.T) { + t.Run("AWS rec NU matches target → counts unchanged", func(t *testing.T) { + // Family db.r7g Aurora MySQL us-east-1: + // TotalNU = 40.6*4 + 3.9*8 = 193.6; CoveredNU = 162.4*0.58 = 94.2 + // existing_pct = 48.66; gap = 80 − 48.66 = 31.34 + // target_NU = 31.34/100 × 193.6 ≈ 60.7 + // AWS rec: 15 × db.r7g.large = 60 NU → scale ≈ 1.01 → floor(15.1) = 15 + cov := PoolCoverageMap{ + rdsPoolKey("us-east-1", "db.r7g.large", "Aurora MySQL", "Single-AZ"): { + Pct: 58.0, AvgInstancesPerHour: 40.6, + }, + rdsPoolKey("us-east-1", "db.r7g.xlarge", "Aurora MySQL", "Single-AZ"): { + Pct: 0.0, AvgInstancesPerHour: 3.9, + }, + } + recs := []common.Recommendation{ + { + Service: common.ServiceRDS, + CommitmentType: common.CommitmentReservedInstance, + Region: "us-east-1", + ResourceType: "db.r7g.large", + Count: 15, + CommitmentCost: 1500, OnDemandCost: 3000, EstimatedSavings: 600, + Details: &common.DatabaseDetails{Engine: "aurora-mysql", AZConfig: "single-az"}, + }, + } + sized, nonRDS := ApplyFamilyNUSizingRDS(recs, cov, 80) + require.Len(t, sized, 1, "RDS rec kept (target NU > 0)") + assert.Empty(t, nonRDS, "no non-RDS recs in this fixture") + assert.Equal(t, 15, sized[0].Count, "AWS-rec NU already matches target → count preserved") + // Costs unchanged (ratio = 1) + assert.InDelta(t, 1500.0, sized[0].CommitmentCost, 0.01) + }) + + t.Run("RecurringMonthlyCost scales with count when populated", func(t *testing.T) { + // Partial-upfront RIs carry a per-month fee in addition to upfront. + // Family-NU sizing must scale this fee by newCount/oldCount so the + // returned rec's monthly cost reflects what the user actually buys + // (not AWS's original recommendation count). nil input stays nil. + monthly := 100.0 + cov := PoolCoverageMap{ + rdsPoolKey("eu-west-2", "db.r7g.large", "MySQL", "Multi-AZ"): { + Pct: 0.0, AvgInstancesPerHour: 10, + }, + } + recs := []common.Recommendation{ + { + Service: common.ServiceRDS, + CommitmentType: common.CommitmentReservedInstance, + Region: "eu-west-2", + ResourceType: "db.r7g.large", + Count: 5, + CommitmentCost: 5000, + RecurringMonthlyCost: &monthly, + Details: &common.DatabaseDetails{Engine: "mysql", AZConfig: "multi-az"}, + }, + } + sized, _ := ApplyFamilyNUSizingRDS(recs, cov, 80) + require.Len(t, sized, 1) + // Scale 32/20 = 1.6 → newCount = 8 → monthly = 100 × 8/5 = 160 + assert.Equal(t, 8, sized[0].Count) + require.NotNil(t, sized[0].RecurringMonthlyCost) + assert.InDelta(t, 160.0, *sized[0].RecurringMonthlyCost, 0.001, "monthly fee scales by 8/5 alongside other costs") + assert.Equal(t, 100.0, monthly, "original target should not be mutated (new pointer)") + }) + + t.Run("AWS rec under-recommends → counts scale up", func(t *testing.T) { + // Family eu-west-2 / db.r7g / MySQL / Multi-AZ: + // Only db.r7g.large size in this fixture, avg=10, cov=0%. + // TotalNU = 10*4 = 40; CoveredNU = 0; existing=0; gap=80 → target_NU = 32 + // AWS rec: 5 × db.r7g.large = 20 NU → scale = 32/20 = 1.6 → floor(8.0) = 8 + cov := PoolCoverageMap{ + rdsPoolKey("eu-west-2", "db.r7g.large", "MySQL", "Multi-AZ"): { + Pct: 0.0, AvgInstancesPerHour: 10, + }, + } + recs := []common.Recommendation{ + { + Service: common.ServiceRDS, + CommitmentType: common.CommitmentReservedInstance, + Region: "eu-west-2", + ResourceType: "db.r7g.large", + Count: 5, + CommitmentCost: 5000, OnDemandCost: 10000, EstimatedSavings: 2000, + AverageInstancesUsedPerHour: 10, + Details: &common.DatabaseDetails{Engine: "mysql", AZConfig: "multi-az"}, + }, + } + sized, _ := ApplyFamilyNUSizingRDS(recs, cov, 80) + require.Len(t, sized, 1) + assert.Equal(t, 8, sized[0].Count, "scale 32/20 = 1.6 × 5 = 8 RIs to deliver 32 NU at 80% target") + assert.InDelta(t, 5000*8.0/5.0, sized[0].CommitmentCost, 0.01, "CommitmentCost scales by 8/5") + }) + + t.Run("AWS rec over-recommends → counts scale down", func(t *testing.T) { + // Family with low demand; AWS rec proposes more NU than 80% target needs. + // TotalNU = 10*4 = 40; existing=0; target=80 → target_NU = 32 + // AWS rec: 20 × .large = 80 NU → scale = 32/80 = 0.4 → floor(8.0) = 8 + cov := PoolCoverageMap{ + rdsPoolKey("eu-west-2", "db.r7g.large", "MySQL", "Multi-AZ"): { + Pct: 0.0, AvgInstancesPerHour: 10, + }, + } + recs := []common.Recommendation{ + { + Service: common.ServiceRDS, + CommitmentType: common.CommitmentReservedInstance, + Region: "eu-west-2", + ResourceType: "db.r7g.large", + Count: 20, + CommitmentCost: 20000, + Details: &common.DatabaseDetails{Engine: "mysql", AZConfig: "multi-az"}, + }, + } + sized, _ := ApplyFamilyNUSizingRDS(recs, cov, 80) + require.Len(t, sized, 1) + assert.Equal(t, 8, sized[0].Count, "AWS over-proposed; scale down to family target") + }) + + t.Run("family at-or-above target → all recs dropped", func(t *testing.T) { + // existing_pct = 90% > target 80% → drop. + cov := PoolCoverageMap{ + rdsPoolKey("us-east-1", "db.r7g.large", "Aurora MySQL", "Single-AZ"): { + Pct: 90.0, AvgInstancesPerHour: 10, + }, + } + recs := []common.Recommendation{ + { + Service: common.ServiceRDS, + CommitmentType: common.CommitmentReservedInstance, + Region: "us-east-1", + ResourceType: "db.r7g.large", + Count: 5, + Details: &common.DatabaseDetails{Engine: "aurora-mysql", AZConfig: "single-az"}, + }, + } + sized, _ := ApplyFamilyNUSizingRDS(recs, cov, 80) + assert.Empty(t, sized, "family already covered → drop all recs") + }) + + t.Run("no coverage signal → recs pass through unchanged", func(t *testing.T) { + // Empty coverage map → rec.Count untouched, rec returned in sizedRDS + // (caller treats as already-sized to avoid double-sizing). + recs := []common.Recommendation{ + { + Service: common.ServiceRDS, + CommitmentType: common.CommitmentReservedInstance, + Region: "ap-east-1", + ResourceType: "db.r7g.large", + Count: 5, + Details: &common.DatabaseDetails{Engine: "aurora-mysql", AZConfig: "single-az"}, + }, + } + sized, _ := ApplyFamilyNUSizingRDS(recs, PoolCoverageMap{}, 80) + require.Len(t, sized, 1, "rec preserved as-is when no family-NU signal") + assert.Equal(t, 5, sized[0].Count) + }) + + t.Run("non-RDS recs flow through nonRDS partition", func(t *testing.T) { + // EC2 and SP recs should NOT be touched by family-NU; they appear + // in the nonRDS slice for per-pool sizing. + recs := []common.Recommendation{ + {Service: common.ServiceEC2, CommitmentType: common.CommitmentReservedInstance, Count: 3}, + {Service: common.ServiceSavingsPlans, CommitmentType: common.CommitmentSavingsPlan, Count: 1}, + { + Service: common.ServiceRDS, + CommitmentType: common.CommitmentReservedInstance, + Region: "us-east-1", + ResourceType: "db.r7g.large", + Count: 5, + Details: &common.DatabaseDetails{Engine: "aurora-mysql", AZConfig: "single-az"}, + }, + } + cov := PoolCoverageMap{ + rdsPoolKey("us-east-1", "db.r7g.large", "Aurora MySQL", "Single-AZ"): { + Pct: 0.0, AvgInstancesPerHour: 10, + }, + } + sized, nonRDS := ApplyFamilyNUSizingRDS(recs, cov, 80) + require.Len(t, sized, 1, "only the RDS rec went through family-NU") + require.Len(t, nonRDS, 2, "EC2 + SP recs left for per-pool sizing") + assert.Equal(t, common.ServiceEC2, nonRDS[0].Service) + assert.Equal(t, common.ServiceSavingsPlans, nonRDS[1].Service) + }) + + t.Run("ProjectedCoverage is cumulative across recs in a family", func(t *testing.T) { + // Two per-account recs in same family/engine/deployment: each rec + // must show the family-wide projected coverage (existing + sum-of- + // all-recs' new NU / family total NU), not just its own slice. + // Without cumulation, two recs each adding 20% of family NU would + // each report "existing + 20%" instead of the true "existing + 40%". + cov := PoolCoverageMap{ + rdsPoolKey("us-east-1", "db.t4g.medium", "Aurora PostgreSQL", "Single-AZ"): { + Pct: 16.7, AvgInstancesPerHour: 12.0, // 24 NU at .medium (×2 NU) + }, + } + // Family total NU = 24. AWS rec api returned two recs (per account) + // each .medium size; AWS-implied total = 5+3 = 8 RIs = 16 NU. + // existing=16.7%, gap=63.3, targetNU = 63.3/100 × 24 = 15.2. + // scale = 15.2/16 = 0.95 → floor(5×0.95)=4, floor(3×0.95)=2. + // Total new NU = (4+2) × 2 = 12. Cumulative projection = + // 16.7 + 12/24*100 = 16.7 + 50 = 66.7%. + recs := []common.Recommendation{ + { + Service: common.ServiceRDS, + CommitmentType: common.CommitmentReservedInstance, + Region: "us-east-1", + ResourceType: "db.t4g.medium", + Account: "production", + Count: 5, + CommitmentCost: 500, + Details: &common.DatabaseDetails{Engine: "aurora-postgresql", AZConfig: "single-az"}, + }, + { + Service: common.ServiceRDS, + CommitmentType: common.CommitmentReservedInstance, + Region: "us-east-1", + ResourceType: "db.t4g.medium", + Account: "staging", + Count: 3, + CommitmentCost: 300, + Details: &common.DatabaseDetails{Engine: "aurora-postgresql", AZConfig: "single-az"}, + }, + } + sized, _ := ApplyFamilyNUSizingRDS(recs, cov, 80) + require.Len(t, sized, 2, "both recs kept after scaling") + // Both recs see the SAME cumulative projection. + assert.InDelta(t, sized[0].ProjectedCoverage, sized[1].ProjectedCoverage, 0.001, + "both recs in the same family must report the same ProjectedCoverage") + // And the projection reflects BOTH recs' contributions, not just + // either one alone. existing 16.7 + (4+2)*2 / 24 * 100 = 66.7 + assert.InDelta(t, 66.7, sized[0].ProjectedCoverage, 0.01, + "cumulative projection: existing + sum-of-all-recs new NU / family total NU") + // Both rec.Count values reflect the scale-down. + assert.Equal(t, 4, sized[0].Count, "prod scaled 5→4") + assert.Equal(t, 2, sized[1].Count, "staging scaled 3→2") + }) + + t.Run("multiple sizes in same family scale together", func(t *testing.T) { + // Family db.r6g Aurora MySQL eu-west-2 has demand at .large and .xlarge. + // avg .large = 5 (20 NU), avg .xlarge = 5 (40 NU). Total = 60 NU; cov = 0. + // target_NU @ 80% = 48. + // AWS rec: 10 × .large = 40 NU. scale = 48/40 = 1.2 → floor(12.0) = 12 .large. + // (No AWS rec at .xlarge — the bundled .large covers via size flex.) + cov := PoolCoverageMap{ + rdsPoolKey("eu-west-2", "db.r6g.large", "Aurora MySQL", "Single-AZ"): { + Pct: 0.0, AvgInstancesPerHour: 5, + }, + rdsPoolKey("eu-west-2", "db.r6g.xlarge", "Aurora MySQL", "Single-AZ"): { + Pct: 0.0, AvgInstancesPerHour: 5, + }, + } + recs := []common.Recommendation{ + { + Service: common.ServiceRDS, + CommitmentType: common.CommitmentReservedInstance, + Region: "eu-west-2", + ResourceType: "db.r6g.large", + Count: 10, + Details: &common.DatabaseDetails{Engine: "aurora-mysql", AZConfig: "single-az"}, + }, + } + sized, _ := ApplyFamilyNUSizingRDS(recs, cov, 80) + require.Len(t, sized, 1) + assert.Equal(t, 12, sized[0].Count, "family-NU need includes .xlarge demand even though AWS rec is at .large") + }) +} + +// TestIsRDSRIRec covers the dispatch predicate that gates family-NU +// sizing — must reject SP/non-RDS commitments so they reach per-pool +// sizing untouched. +func TestIsRDSRIRec(t *testing.T) { + cases := []struct { + name string + rec common.Recommendation + want bool + }{ + {"RDS RI", common.Recommendation{Service: common.ServiceRDS, CommitmentType: common.CommitmentReservedInstance}, true}, + {"RelationalDB alias", common.Recommendation{Service: common.ServiceRelationalDB, CommitmentType: common.CommitmentReservedInstance}, true}, + {"EC2 RI", common.Recommendation{Service: common.ServiceEC2, CommitmentType: common.CommitmentReservedInstance}, false}, + {"RDS SP (impossible but defensive)", common.Recommendation{Service: common.ServiceRDS, CommitmentType: common.CommitmentSavingsPlan}, false}, + {"SP", common.Recommendation{Service: common.ServiceSavingsPlans, CommitmentType: common.CommitmentSavingsPlan}, false}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.want, isRDSRIRec(tc.rec)) + }) + } +} diff --git a/providers/aws/recommendations/parser_ri.go b/providers/aws/recommendations/parser_ri.go index fe0ffbf7..0697cbf9 100644 --- a/providers/aws/recommendations/parser_ri.go +++ b/providers/aws/recommendations/parser_ri.go @@ -45,12 +45,16 @@ func (c *Client) parseRecommendationDetail(details *types.ReservationPurchaseRec Timestamp: time.Now(), } - // Parse recommended quantity + // Parse recommended quantity. RecommendedCount preserves AWS's pre-sizing + // count so the CSV can show what AWS proposed alongside what --coverage / + // --target-coverage chose; Count is the working value the sizing step + // mutates. count, err := c.parseRecommendedQuantity(details) if err != nil { return nil, fmt.Errorf("failed to parse recommended quantity: %w", err) } rec.Count = count + rec.RecommendedCount = count // Parse cost information rec.EstimatedSavings, rec.SavingsPercentage, err = c.parseCostInformation(details) @@ -66,6 +70,9 @@ func (c *Client) parseRecommendationDetail(details *types.ReservationPurchaseRec // Parse AWS-provided cost details c.parseAWSCostDetails(rec, details) + // Parse RI utilization signals used by --target-coverage sizing + c.parseRIUtilizationSignals(rec, details) + // Parse service-specific details if err := c.parseServiceSpecificDetails(rec, details, params.Service); err != nil { return nil, err @@ -74,6 +81,27 @@ func (c *Client) parseRecommendationDetail(details *types.ReservationPurchaseRec return rec, nil } +// parseRIUtilizationSignals populates AverageInstancesUsedPerHour and +// RecommendedUtilization from the CE response. Both fields are *string in the +// SDK; nil or unparseable values leave the destination at zero, which the +// --target-coverage sizing path treats as "no signal" and skips. +func (c *Client) parseRIUtilizationSignals(rec *common.Recommendation, details *types.ReservationPurchaseRecommendationDetail) { + if details.AverageNumberOfInstancesUsedPerHour != nil { + if v, err := strconv.ParseFloat(*details.AverageNumberOfInstancesUsedPerHour, 64); err == nil { + rec.AverageInstancesUsedPerHour = v + } else { + log.Printf("WARNING: failed to parse AverageNumberOfInstancesUsedPerHour %q for RI recommendation (service=%s, account=%s): %v", *details.AverageNumberOfInstancesUsedPerHour, rec.Service, rec.Account, err) + } + } + if details.AverageUtilization != nil { + if v, err := strconv.ParseFloat(*details.AverageUtilization, 64); err == nil { + rec.RecommendedUtilization = v + } else { + log.Printf("WARNING: failed to parse AverageUtilization %q for RI recommendation (service=%s, account=%s): %v", *details.AverageUtilization, rec.Service, rec.Account, err) + } + } +} + // parseRecommendedQuantity extracts the recommended quantity from details func (c *Client) parseRecommendedQuantity(details *types.ReservationPurchaseRecommendationDetail) (int, error) { if details.RecommendedNumberOfInstancesToPurchase == nil { diff --git a/providers/aws/recommendations/parser_ri_test.go b/providers/aws/recommendations/parser_ri_test.go index 47e78286..05fce312 100644 --- a/providers/aws/recommendations/parser_ri_test.go +++ b/providers/aws/recommendations/parser_ri_test.go @@ -395,3 +395,71 @@ func TestParseRecommendations_EmptyInput(t *testing.T) { require.NoError(t, err) assert.Empty(t, recs) } + +// TestParseRIUtilizationSignals covers the AverageNumberOfInstancesUsedPerHour +// and AverageUtilization fields added for issue #338 (--target-coverage). +// Verifies both successful parses, nil-pointer fallback to zero, and +// parse-failure fallback to zero — the sizing path in cmd/helpers.go treats +// zero as "no signal" so the fallback behaviour matters. +func TestParseRIUtilizationSignals(t *testing.T) { + client := &Client{} + + tests := []struct { + name string + details *types.ReservationPurchaseRecommendationDetail + wantAvgInstances float64 + wantUtilization float64 + }{ + { + name: "both fields parsed", + details: &types.ReservationPurchaseRecommendationDetail{ + AverageNumberOfInstancesUsedPerHour: aws.String("8.5"), + AverageUtilization: aws.String("85.3"), + }, + wantAvgInstances: 8.5, + wantUtilization: 85.3, + }, + { + name: "both fields nil → both zero", + details: &types.ReservationPurchaseRecommendationDetail{}, + wantAvgInstances: 0, + wantUtilization: 0, + }, + { + name: "unparseable AverageNumberOfInstancesUsedPerHour → that field zero, other still parses", + details: &types.ReservationPurchaseRecommendationDetail{ + AverageNumberOfInstancesUsedPerHour: aws.String("not-a-number"), + AverageUtilization: aws.String("90.0"), + }, + wantAvgInstances: 0, + wantUtilization: 90.0, + }, + { + name: "unparseable AverageUtilization → that field zero, other still parses", + details: &types.ReservationPurchaseRecommendationDetail{ + AverageNumberOfInstancesUsedPerHour: aws.String("5.0"), + AverageUtilization: aws.String("garbage"), + }, + wantAvgInstances: 5.0, + wantUtilization: 0, + }, + { + name: "zero values parse as zero (not treated as missing at parse time)", + details: &types.ReservationPurchaseRecommendationDetail{ + AverageNumberOfInstancesUsedPerHour: aws.String("0"), + AverageUtilization: aws.String("0.0"), + }, + wantAvgInstances: 0, + wantUtilization: 0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + rec := &common.Recommendation{} + client.parseRIUtilizationSignals(rec, tt.details) + assert.Equal(t, tt.wantAvgInstances, rec.AverageInstancesUsedPerHour) + assert.Equal(t, tt.wantUtilization, rec.RecommendedUtilization) + }) + } +} diff --git a/providers/aws/recommendations/parser_sp.go b/providers/aws/recommendations/parser_sp.go index 283e969b..800c5a41 100644 --- a/providers/aws/recommendations/parser_sp.go +++ b/providers/aws/recommendations/parser_sp.go @@ -136,6 +136,11 @@ func (c *Client) parseSavingsPlanDetail( monthlySavings := parseOptionalFloat("EstimatedMonthlySavingsAmount", detail.EstimatedMonthlySavingsAmount) savingsPercent := parseOptionalFloat("EstimatedSavingsPercentage", detail.EstimatedSavingsPercentage) upfrontCost := parseOptionalFloat("UpfrontCost", detail.UpfrontCost) + // EstimatedAverageUtilization carries the "if you buy exactly this commitment, + // what % of it will AWS expect to be used" signal. Used by --target-coverage + // sizing in cmd/helpers.go; zero (nil pointer or parse failure) means "no signal" + // and the sizing path leaves the recommendation unchanged. + recommendedUtilization := parseOptionalFloat("EstimatedAverageUtilization", detail.EstimatedAverageUtilization) // onDemandCost is the canonical monthly on-demand baseline for this SP // recommendation. AWS Cost Explorer returns the average hourly on-demand // spend over the lookback period in CurrentAverageHourlyOnDemandSpend; @@ -196,19 +201,20 @@ func (c *Client) parseSavingsPlanDetail( } return &common.Recommendation{ - Provider: common.ProviderAWS, - Service: serviceSlugForPlanType(planType), - PaymentOption: params.PaymentOption, - Term: params.Term, - CommitmentType: common.CommitmentSavingsPlan, - Count: 1, - EstimatedSavings: monthlySavings, - SavingsPercentage: savingsPercent, - CommitmentCost: upfrontCost, - OnDemandCost: onDemandCost, - RecurringMonthlyCost: recurringMonthlyCost, - Timestamp: time.Now(), - Account: accountID, + Provider: common.ProviderAWS, + Service: serviceSlugForPlanType(planType), + PaymentOption: params.PaymentOption, + Term: params.Term, + CommitmentType: common.CommitmentSavingsPlan, + Count: 1, + EstimatedSavings: monthlySavings, + SavingsPercentage: savingsPercent, + CommitmentCost: upfrontCost, + OnDemandCost: onDemandCost, + RecurringMonthlyCost: recurringMonthlyCost, + RecommendedUtilization: recommendedUtilization, + Timestamp: time.Now(), + Account: accountID, Details: &common.SavingsPlanDetails{ PlanType: planTypeStr, HourlyCommitment: hourlyCommitment, diff --git a/providers/aws/recommendations/parser_sp_additional_test.go b/providers/aws/recommendations/parser_sp_additional_test.go index f780f1c2..68e6331d 100644 --- a/providers/aws/recommendations/parser_sp_additional_test.go +++ b/providers/aws/recommendations/parser_sp_additional_test.go @@ -240,6 +240,10 @@ func (m *mockCostExplorerForSP) GetReservationUtilization(ctx context.Context, p return &costexplorer.GetReservationUtilizationOutput{}, nil } +func (m *mockCostExplorerForSP) GetReservationCoverage(ctx context.Context, params *costexplorer.GetReservationCoverageInput, optFns ...func(*costexplorer.Options)) (*costexplorer.GetReservationCoverageOutput, error) { + return &costexplorer.GetReservationCoverageOutput{}, nil +} + func TestGetSavingsPlansRecommendations_WithFilters(t *testing.T) { mockAPI := &mockCostExplorerForSP{ responses: map[types.SupportedSavingsPlansType]*costexplorer.GetSavingsPlansPurchaseRecommendationOutput{ diff --git a/providers/aws/recommendations/parser_sp_test.go b/providers/aws/recommendations/parser_sp_test.go index 99115534..f896d427 100644 --- a/providers/aws/recommendations/parser_sp_test.go +++ b/providers/aws/recommendations/parser_sp_test.go @@ -3,8 +3,12 @@ package recommendations import ( "testing" + "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/service/costexplorer/types" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/LeanerCloud/CUDly/pkg/common" ) func TestGetFilteredPlanTypes(t *testing.T) { @@ -146,3 +150,56 @@ func TestGetFilteredPlanTypes(t *testing.T) { }) } } + +// TestParseSavingsPlanDetail_RecommendedUtilization covers the +// EstimatedAverageUtilization field added for issue #338 — the SP equivalent +// of the RI AverageUtilization signal that drives --target-coverage sizing. +func TestParseSavingsPlanDetail_RecommendedUtilization(t *testing.T) { + client := &Client{} + params := common.RecommendationParams{ + Service: common.ServiceSavingsPlansCompute, + PaymentOption: "no-upfront", + Term: "1yr", + } + + tests := []struct { + name string + utilizationStr *string + wantUtilization float64 + wantAvgInstancesIs float64 + }{ + { + name: "field present and parseable", + utilizationStr: aws.String("87.5"), + wantUtilization: 87.5, + wantAvgInstancesIs: 0, + }, + { + name: "field nil → zero", + utilizationStr: nil, + wantUtilization: 0, + wantAvgInstancesIs: 0, + }, + { + name: "field unparseable → zero (parseOptionalFloat logs warn)", + utilizationStr: aws.String("not-a-number"), + wantUtilization: 0, + wantAvgInstancesIs: 0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + detail := &types.SavingsPlansPurchaseRecommendationDetail{ + HourlyCommitmentToPurchase: aws.String("1.0"), + EstimatedAverageUtilization: tt.utilizationStr, + } + rec := client.parseSavingsPlanDetail(detail, params, types.SupportedSavingsPlansTypeComputeSp) + require.NotNil(t, rec) + assert.Equal(t, tt.wantUtilization, rec.RecommendedUtilization, + "SP utilization should be parsed into rec.RecommendedUtilization") + assert.Equal(t, tt.wantAvgInstancesIs, rec.AverageInstancesUsedPerHour, + "SP recs leave AverageInstancesUsedPerHour at zero (not applicable for SPs)") + }) + } +} diff --git a/providers/aws/service_client.go b/providers/aws/service_client.go index 35e18b30..a9d5acff 100644 --- a/providers/aws/service_client.go +++ b/providers/aws/service_client.go @@ -164,6 +164,16 @@ func (r *RecommendationsClientAdapter) GetRIUtilization(ctx context.Context, loo return r.client.GetRIUtilization(ctx, lookbackDays) } +// GetRICoverageMap returns the per-pool RI coverage % over the last +// lookbackDays days, keyed by "region:instance_type:account" (or +// "region:instance_type:engine:account" for RDS) so the apply helper +// can look up per-linked-account coverage. Caller passes the regions +// to scan; CE returns coverage filtered to that region and grouped by +// LINKED_ACCOUNT + INSTANCE_TYPE. +func (r *RecommendationsClientAdapter) GetRICoverageMap(ctx context.Context, lookbackDays int, regions []string) (recommendations.PoolCoverageMap, error) { + return r.client.GetRICoverageMap(ctx, lookbackDays, regions) +} + // NewRecommendationsClientDirect creates a new recommendations client returning the concrete type // (needed for GetRIUtilization which is not part of the generic provider interface). func NewRecommendationsClientDirect(cfg aws.Config) *RecommendationsClientAdapter { diff --git a/providers/aws/service_client_test.go b/providers/aws/service_client_test.go index 374271df..61b5c5b7 100644 --- a/providers/aws/service_client_test.go +++ b/providers/aws/service_client_test.go @@ -32,6 +32,10 @@ func (m *mockCostExplorerClient) GetReservationUtilization(ctx context.Context, return &costexplorer.GetReservationUtilizationOutput{}, nil } +func (m *mockCostExplorerClient) GetReservationCoverage(ctx context.Context, params *costexplorer.GetReservationCoverageInput, optFns ...func(*costexplorer.Options)) (*costexplorer.GetReservationCoverageOutput, error) { + return &costexplorer.GetReservationCoverageOutput{}, nil +} + // newTestRecommendationsClient creates a recommendations client with a mock CE client func newTestRecommendationsClient(ce *mockCostExplorerClient) *recommendations.Client { return recommendations.NewClientWithAPI(ce, "us-east-1") diff --git a/providers/aws/services/rds/client.go b/providers/aws/services/rds/client.go index aa66f56f..873b0208 100644 --- a/providers/aws/services/rds/client.go +++ b/providers/aws/services/rds/client.go @@ -87,6 +87,15 @@ func (c *Client) GetExistingCommitments(ctx context.Context) ([]common.Commitmen termMonths = 36 } + // Deployment carries the same vocabulary as DatabaseDetails.AZConfig + // on Recommendation so pool-key matching aligns deployment-wise + // between recs and existing commitments (used by expiry adjustment). + // AWS RDS SDK exposes MultiAZ as a *bool on ReservedDBInstance; + // nil or false → "single-az", true → "multi-az". + deployment := "single-az" + if aws.ToBool(instance.MultiAZ) { + deployment = "multi-az" + } commitment := common.Commitment{ Provider: common.ProviderAWS, CommitmentID: aws.ToString(instance.ReservedDBInstanceId), @@ -95,6 +104,7 @@ func (c *Client) GetExistingCommitments(ctx context.Context) ([]common.Commitmen Region: c.region, ResourceType: aws.ToString(instance.DBInstanceClass), Engine: aws.ToString(instance.ProductDescription), // Capture engine for accurate duplicate checking + Deployment: deployment, Count: int(aws.ToInt32(instance.DBInstanceCount)), State: state, StartDate: aws.ToTime(instance.StartTime),