From f315a0393c7ae1f3e100d02a100a2d0b962147c2 Mon Sep 17 00:00:00 2001 From: Cristian Magherusan-Stanciu Date: Fri, 24 Apr 2026 23:07:19 +0200 Subject: [PATCH 1/2] feat(commitment-options): probe AWS reserved-offerings live and validate saves MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the hardcoded per-service (term × payment) rules in frontend/src/commitmentOptions.ts with data harvested from the AWS reserved-offerings APIs. The probe runs at most once per server lifetime (persisted in new tables commitment_options_probe_runs + commitment_options_combos); if the DB already has data, subsequent Get()s short-circuit on the cache. Any failure along the chain — no AWS account connected, probe denied, unknown payment shape — collapses to ErrNoData and the frontend falls back to its hardcoded rules, so this is wired unconditionally. Backend: - internal/commitmentopts/: new package with per-service Probers (rds, elasticache, opensearch, redshift, memorydb, ec2), shared duration+payment normalizers (tokenizes inputs to handle enum stringers and string OfferingTypes, rejects pre-2014 legacy utilization values), Postgres-backed Store with ON CONFLICT DO NOTHING for cross-process races, and an errgroup-driven Service that enforces all-or-nothing persistence — one probe failure aborts the whole run so callers never see half-populated cache. - GET /api/commitment-options returns {status:"ok", aws:{...}} with term/payment tuples, or {status:"unavailable"} (HTTP 200) when probe data is missing so the frontend's error-toast path stays quiet. - updateServiceConfig (PUT /api/config/service/{id}) now calls Validate() via a new checkCommitmentOptionCombo helper before persisting. Rejects invalid (aws, service, term, payment) combos with 400; permissive-true when probe data is absent so the frontend's hardcoded rules remain the gate. - Handler wiring threads credStore + awsCfg through a BuildConfigFn that calls credentials.ResolveAWSCredentialProvider for the first enabled AWS account and builds a regional aws.Config for probes. Frontend: - fetchAndPopulateCommitmentOptions() fires from loadGlobalSettings BEFORE syncAllServiceCommitmentConstraints(), overlays server data onto commitmentConfigs.aws (diffing supported combos against the full terms × payments product), and silently no-ops on network/parse/unavailable. DB migration 000039 adds the two tables. Safe to drop and re-add (ON CONFLICT-tolerant; no external references). --- .../src/__tests__/commitmentOptions.test.ts | 86 ++++ frontend/src/commitmentOptions.ts | 63 +++ frontend/src/settings.ts | 6 + go.mod | 8 +- internal/api/handler.go | 7 + internal/api/handler_commitment_options.go | 62 +++ .../api/handler_commitment_options_test.go | 110 +++++ internal/api/handler_config.go | 29 ++ internal/api/handler_config_test.go | 80 ++++ internal/api/router.go | 9 + internal/api/types.go | 13 + internal/commitmentopts/normalize.go | 78 ++++ internal/commitmentopts/normalize_test.go | 94 ++++ internal/commitmentopts/probe.go | 426 ++++++++++++++++++ internal/commitmentopts/probe_test.go | 346 ++++++++++++++ internal/commitmentopts/service.go | 189 ++++++++ internal/commitmentopts/service_test.go | 286 ++++++++++++ internal/commitmentopts/store_postgres.go | 125 +++++ .../commitmentopts/store_postgres_test.go | 134 ++++++ internal/commitmentopts/types.go | 86 ++++ .../000039_commitment_options_cache.down.sql | 2 + .../000039_commitment_options_cache.up.sql | 23 + internal/server/app.go | 26 ++ 23 files changed, 2284 insertions(+), 4 deletions(-) create mode 100644 internal/api/handler_commitment_options.go create mode 100644 internal/api/handler_commitment_options_test.go create mode 100644 internal/commitmentopts/normalize.go create mode 100644 internal/commitmentopts/normalize_test.go create mode 100644 internal/commitmentopts/probe.go create mode 100644 internal/commitmentopts/probe_test.go create mode 100644 internal/commitmentopts/service.go create mode 100644 internal/commitmentopts/service_test.go create mode 100644 internal/commitmentopts/store_postgres.go create mode 100644 internal/commitmentopts/store_postgres_test.go create mode 100644 internal/commitmentopts/types.go create mode 100644 internal/database/postgres/migrations/000039_commitment_options_cache.down.sql create mode 100644 internal/database/postgres/migrations/000039_commitment_options_cache.up.sql diff --git a/frontend/src/__tests__/commitmentOptions.test.ts b/frontend/src/__tests__/commitmentOptions.test.ts index 0c97ad1b..e5be4b35 100644 --- a/frontend/src/__tests__/commitmentOptions.test.ts +++ b/frontend/src/__tests__/commitmentOptions.test.ts @@ -2,6 +2,7 @@ * Tests for commitmentOptions module */ import { + fetchAndPopulateCommitmentOptions, getCommitmentConfig, isValidCombination, getValidPaymentOptions, @@ -707,4 +708,89 @@ describe('commitmentOptions', () => { expect(configWithInvalid.invalidCombinations).toHaveLength(1); }); }); + + describe('fetchAndPopulateCommitmentOptions', () => { + // These tests mutate the shared commitmentConfigs module state via the + // function under test. Restore a known-good overlay at the end of each + // test by re-populating with the hardcoded fallback shape (no supported + // combos → full invalidCombinations → close enough to the starting + // state for subsequent tests to see consistent results). + const mockOk = (aws: Record>) => + jest.fn().mockResolvedValue({ + ok: true, + json: () => Promise.resolve({ status: 'ok', aws }) + } as Response); + + afterEach(() => { + // Reset RDS back to hardcoded — other tests in this file assert RDS + // retains `{term:3, payment:'no-upfront'}` as invalid. + return fetchAndPopulateCommitmentOptions( + mockOk({ rds: [ + { term: 1, payment: 'all-upfront' }, + { term: 1, payment: 'partial-upfront' }, + { term: 1, payment: 'no-upfront' }, + { term: 3, payment: 'all-upfront' }, + { term: 3, payment: 'partial-upfront' }, + ]}) + ); + }); + + it('overlays server-supplied combos onto AWS services', async () => { + // Server says RDS supports ONLY 1yr all-upfront — everything else is + // invalid. Overlay should reflect that exactly. + const fetchMock = mockOk({ + rds: [{ term: 1, payment: 'all-upfront' }], + }); + + await fetchAndPopulateCommitmentOptions(fetchMock); + + expect(fetchMock).toHaveBeenCalledWith('/api/commitment-options'); + expect(isValidCombination('aws', 'rds', 1, 'all-upfront')).toBe(true); + expect(isValidCombination('aws', 'rds', 1, 'partial-upfront')).toBe(false); + expect(isValidCombination('aws', 'rds', 3, 'no-upfront')).toBe(false); + }); + + it('leaves hardcoded rules intact on status:unavailable', async () => { + const fetchMock = jest.fn().mockResolvedValue({ + ok: true, + json: () => Promise.resolve({ status: 'unavailable' }) + } as Response); + + await fetchAndPopulateCommitmentOptions(fetchMock); + + // RDS hardcoded rule: 3yr no-upfront invalid, others valid. + expect(isValidCombination('aws', 'rds', 3, 'no-upfront')).toBe(false); + expect(isValidCombination('aws', 'rds', 1, 'all-upfront')).toBe(true); + }); + + it('leaves hardcoded rules intact on HTTP error', async () => { + const fetchMock = jest.fn().mockResolvedValue({ ok: false } as Response); + + await fetchAndPopulateCommitmentOptions(fetchMock); + + expect(isValidCombination('aws', 'rds', 3, 'no-upfront')).toBe(false); + expect(isValidCombination('aws', 'rds', 1, 'all-upfront')).toBe(true); + }); + + it('leaves hardcoded rules intact on network error', async () => { + const fetchMock = jest.fn().mockRejectedValue(new Error('network down')); + + await fetchAndPopulateCommitmentOptions(fetchMock); + + expect(isValidCombination('aws', 'rds', 3, 'no-upfront')).toBe(false); + }); + + it('clears invalidCombinations when server reports full support', async () => { + const allCombos = [1, 3].flatMap(term => + ['all-upfront', 'partial-upfront', 'no-upfront'].map(payment => ({ term, payment })) + ); + const fetchMock = mockOk({ rds: allCombos }); + + await fetchAndPopulateCommitmentOptions(fetchMock); + + // Every combo now valid — previously 3yr no-upfront was blocked. + expect(isValidCombination('aws', 'rds', 3, 'no-upfront')).toBe(true); + expect(isValidCombination('aws', 'rds', 1, 'partial-upfront')).toBe(true); + }); + }); }); diff --git a/frontend/src/commitmentOptions.ts b/frontend/src/commitmentOptions.ts index d78c500d..26e20c98 100644 --- a/frontend/src/commitmentOptions.ts +++ b/frontend/src/commitmentOptions.ts @@ -254,6 +254,69 @@ export function getPaymentLabel(value: string): string { return payment?.label ?? value; } +interface ServerCommitmentOptions { + status: 'ok' | 'unavailable'; + aws?: Record>; +} + +// FetchLike is a subset of fetch's signature that tests can stub without +// touching the global. Defaults to `fetch` when the caller passes nothing. +type FetchLike = (input: string, init?: RequestInit) => Promise; + +/** + * Fetch the dynamically-probed AWS commitment options from the backend and + * overlay them onto `commitmentConfigs.aws`. Server-side data always wins — + * we trust live AWS offerings over our hardcoded list. On any failure + * (network error, non-200, {status:"unavailable"}) the hardcoded rules stay + * intact so the UI still gates correctly. + * + * Idempotent: calling twice with the same data is a no-op. The Settings + * page awaits this before syncing per-service constraints so the first + * render reflects the server's rules, not the fallback's. + */ +export async function fetchAndPopulateCommitmentOptions(fetchFn?: FetchLike): Promise { + const f: FetchLike = fetchFn ?? ((input, init) => fetch(input, init)); + + let body: ServerCommitmentOptions; + try { + const resp = await f('/api/commitment-options'); + if (!resp.ok) return; + body = (await resp.json()) as ServerCommitmentOptions; + } catch { + // Network/parse errors: silently fall back to hardcoded rules. The + // frontend still works; we just miss the dynamic overlay. + return; + } + + if (body.status !== 'ok' || !body.aws) return; + + const awsConfigs = commitmentConfigs.aws ?? (commitmentConfigs.aws = {}); + for (const [service, supportedCombos] of Object.entries(body.aws)) { + const existing = awsConfigs[service]; + const base: CommitmentConfig = existing ?? { terms: STANDARD_TERMS, payments: AWS_PAYMENTS }; + + // Derive invalidCombinations as the set difference of (terms × payments) + // minus the supported tuples the server returned. + const supportedKeys = new Set( + supportedCombos.map(c => `${c.term}:${c.payment}`) + ); + const invalid: Array<{ term: number; payment: string }> = []; + for (const term of base.terms) { + for (const payment of base.payments) { + if (!supportedKeys.has(`${term.value}:${payment.value}`)) { + invalid.push({ term: term.value, payment: payment.value }); + } + } + } + + awsConfigs[service] = { + terms: base.terms, + payments: base.payments, + invalidCombinations: invalid.length > 0 ? invalid : undefined, + }; + } +} + /** * Map legacy AWS payment values to display labels */ diff --git a/frontend/src/settings.ts b/frontend/src/settings.ts index c6273703..efcf7fd4 100644 --- a/frontend/src/settings.ts +++ b/frontend/src/settings.ts @@ -3,6 +3,7 @@ */ import * as api from './api'; +import { fetchAndPopulateCommitmentOptions } from './commitmentOptions'; import { initFederationPanel } from './federation'; import { confirmDialog } from './confirmDialog'; import { reflectDirtyState } from './settings-subnav'; @@ -1204,6 +1205,11 @@ export async function loadGlobalSettings(): Promise { if (formEl) formEl.classList.add('hidden'); if (errorEl) errorEl.classList.add('hidden'); + // Overlay dynamically-probed AWS commitment rules before we render the + // form, so the first paint already respects server data. Failures fall + // back to hardcoded rules silently — we never block Settings on this. + await fetchAndPopulateCommitmentOptions(); + try { const data = await api.getConfig(); diff --git a/go.mod b/go.mod index e246261d..8fd3bfa3 100644 --- a/go.mod +++ b/go.mod @@ -9,11 +9,11 @@ require ( github.com/aws/aws-sdk-go-v2/config v1.26.2 github.com/aws/aws-sdk-go-v2/service/costexplorer v1.61.0 // indirect github.com/aws/aws-sdk-go-v2/service/ec2 v1.251.2 - github.com/aws/aws-sdk-go-v2/service/elasticache v1.50.3 // indirect - github.com/aws/aws-sdk-go-v2/service/memorydb v1.31.4 // indirect - github.com/aws/aws-sdk-go-v2/service/opensearch v1.52.3 // indirect + github.com/aws/aws-sdk-go-v2/service/elasticache v1.50.3 + github.com/aws/aws-sdk-go-v2/service/memorydb v1.31.4 + github.com/aws/aws-sdk-go-v2/service/opensearch v1.52.3 github.com/aws/aws-sdk-go-v2/service/rds v1.97.3 - github.com/aws/aws-sdk-go-v2/service/redshift v1.58.3 // indirect + github.com/aws/aws-sdk-go-v2/service/redshift v1.58.3 github.com/spf13/cobra v1.8.0 github.com/stretchr/testify v1.11.1 ) diff --git a/internal/api/handler.go b/internal/api/handler.go index ee615d67..61f0c7d6 100644 --- a/internal/api/handler.go +++ b/internal/api/handler.go @@ -69,6 +69,12 @@ type Handler struct { // fields stay nil. reshapeEC2Factory func(aws.Config) reshapeEC2Client reshapeRecsFactory func(aws.Config) reshapeRecsClient + + // commitmentOpts discovers which AWS (term, payment) combinations + // each service actually sells and validates saves against that data. + // Nil is valid: the endpoint returns unavailable and save-side + // validation no-ops, deferring to the frontend's hardcoded rules. + commitmentOpts CommitmentOptsInterface } // getRIUtilizationCache returns the Postgres-backed TTL cache for Cost @@ -111,6 +117,7 @@ func NewHandler(cfg HandlerConfig) *Handler { analyticsCollector: cfg.AnalyticsCollector, signer: cfg.OIDCSigner, issuerURL: cfg.OIDCIssuerURL, + commitmentOpts: cfg.CommitmentOpts, } // Pre-load API key (with a 5s timeout to avoid stalling cold-start indefinitely) diff --git a/internal/api/handler_commitment_options.go b/internal/api/handler_commitment_options.go new file mode 100644 index 00000000..59adf683 --- /dev/null +++ b/internal/api/handler_commitment_options.go @@ -0,0 +1,62 @@ +// Package api provides the HTTP API handlers for the CUDly dashboard. +package api + +import ( + "context" + "errors" + + "github.com/LeanerCloud/CUDly/internal/commitmentopts" +) + +// commitmentOptionsResponse is the JSON shape returned by +// GET /api/commitment-options. On success, AWS carries the probed combos +// keyed by service (rds, elasticache, ...). Azure and GCP are deliberately +// omitted — those commitment rules stay hardcoded in the frontend because +// their APIs don't expose a comparable probe. +type commitmentOptionsResponse struct { + Status string `json:"status"` + AWS map[string][]commitmentOptionCombo `json:"aws,omitempty"` +} + +// commitmentOptionCombo is one (term, payment) tuple as the frontend +// consumes it. Dropping Provider/Service from the persisted Combo shape +// keeps the wire payload compact. +type commitmentOptionCombo struct { + Term int `json:"term"` + Payment string `json:"payment"` +} + +// getCommitmentOptions returns the dynamically-probed AWS commitment +// options. On any error or when the probe hasn't run yet, it returns +// {"status":"unavailable"} (200, not a 5xx) so the frontend can fall +// back to its hardcoded defaults without tripping the generic +// error-toast path. +func (h *Handler) getCommitmentOptions(ctx context.Context) (*commitmentOptionsResponse, error) { + if h.commitmentOpts == nil { + return &commitmentOptionsResponse{Status: "unavailable"}, nil + } + opts, err := h.commitmentOpts.Get(ctx) + if err != nil { + if errors.Is(err, commitmentopts.ErrNoData) { + return &commitmentOptionsResponse{Status: "unavailable"}, nil + } + return nil, err + } + + awsOpts := opts["aws"] + out := make(map[string][]commitmentOptionCombo, len(awsOpts)) + for svc, combos := range awsOpts { + out[svc] = make([]commitmentOptionCombo, 0, len(combos)) + for _, c := range combos { + out[svc] = append(out[svc], commitmentOptionCombo{Term: c.TermYears, Payment: c.Payment}) + } + } + if len(out) == 0 { + // Probe ran but returned nothing for AWS (e.g. every probe got + // filtered out by normalization). Treat as unavailable so the + // frontend falls through to its hardcoded rules rather than + // rendering an empty constraint set. + return &commitmentOptionsResponse{Status: "unavailable"}, nil + } + return &commitmentOptionsResponse{Status: "ok", AWS: out}, nil +} diff --git a/internal/api/handler_commitment_options_test.go b/internal/api/handler_commitment_options_test.go new file mode 100644 index 00000000..6e0051b8 --- /dev/null +++ b/internal/api/handler_commitment_options_test.go @@ -0,0 +1,110 @@ +package api + +import ( + "context" + "errors" + "testing" + + "github.com/LeanerCloud/CUDly/internal/commitmentopts" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// stubCommitmentOpts lets handler tests drive the endpoint without the full +// probe+store machinery. Mirrors the CommitmentOptsInterface surface. +type stubCommitmentOpts struct { + getFn func(ctx context.Context) (commitmentopts.Options, error) + validateFn func(ctx context.Context, provider, service string, term int, payment string) (bool, error) +} + +func (s *stubCommitmentOpts) Get(ctx context.Context) (commitmentopts.Options, error) { + return s.getFn(ctx) +} + +func (s *stubCommitmentOpts) Validate(ctx context.Context, provider, service string, term int, payment string) (bool, error) { + return s.validateFn(ctx, provider, service, term, payment) +} + +func TestGetCommitmentOptions_NilService_ReturnsUnavailable(t *testing.T) { + h := &Handler{commitmentOpts: nil} + + resp, err := h.getCommitmentOptions(context.Background()) + + require.NoError(t, err) + assert.Equal(t, "unavailable", resp.Status) + assert.Nil(t, resp.AWS) +} + +func TestGetCommitmentOptions_ErrNoData_ReturnsUnavailable(t *testing.T) { + h := &Handler{commitmentOpts: &stubCommitmentOpts{ + getFn: func(context.Context) (commitmentopts.Options, error) { + return nil, commitmentopts.ErrNoData + }, + }} + + resp, err := h.getCommitmentOptions(context.Background()) + + require.NoError(t, err) + assert.Equal(t, "unavailable", resp.Status) + assert.Nil(t, resp.AWS) +} + +func TestGetCommitmentOptions_UnexpectedError_Propagates(t *testing.T) { + boom := errors.New("database exploded") + h := &Handler{commitmentOpts: &stubCommitmentOpts{ + getFn: func(context.Context) (commitmentopts.Options, error) { + return nil, boom + }, + }} + + resp, err := h.getCommitmentOptions(context.Background()) + + require.ErrorIs(t, err, boom) + assert.Nil(t, resp) +} + +func TestGetCommitmentOptions_EmptyAWS_CollapsesToUnavailable(t *testing.T) { + // Probe succeeded but returned nothing for AWS. Treating this as + // unavailable keeps the frontend on its hardcoded fallback rather than + // rendering an empty constraint set. + h := &Handler{commitmentOpts: &stubCommitmentOpts{ + getFn: func(context.Context) (commitmentopts.Options, error) { + return commitmentopts.Options{"aws": map[string][]commitmentopts.Combo{}}, nil + }, + }} + + resp, err := h.getCommitmentOptions(context.Background()) + + require.NoError(t, err) + assert.Equal(t, "unavailable", resp.Status) + assert.Nil(t, resp.AWS) +} + +func TestGetCommitmentOptions_Success_ReturnsAWSCombos(t *testing.T) { + h := &Handler{commitmentOpts: &stubCommitmentOpts{ + getFn: func(context.Context) (commitmentopts.Options, error) { + return commitmentopts.Options{ + "aws": map[string][]commitmentopts.Combo{ + "rds": { + {Provider: "aws", Service: "rds", TermYears: 1, Payment: "all-upfront"}, + {Provider: "aws", Service: "rds", TermYears: 1, Payment: "partial-upfront"}, + {Provider: "aws", Service: "rds", TermYears: 3, Payment: "all-upfront"}, + }, + "elasticache": { + {Provider: "aws", Service: "elasticache", TermYears: 1, Payment: "no-upfront"}, + {Provider: "aws", Service: "elasticache", TermYears: 3, Payment: "no-upfront"}, + }, + }, + }, nil + }, + }} + + resp, err := h.getCommitmentOptions(context.Background()) + + require.NoError(t, err) + assert.Equal(t, "ok", resp.Status) + assert.Len(t, resp.AWS["rds"], 3) + assert.Len(t, resp.AWS["elasticache"], 2) + // Spot-check the payload shape: term/payment only, no provider/service echo. + assert.Contains(t, resp.AWS["elasticache"], commitmentOptionCombo{Term: 3, Payment: "no-upfront"}) +} diff --git a/internal/api/handler_config.go b/internal/api/handler_config.go index ffe62de4..2a184e9a 100644 --- a/internal/api/handler_config.go +++ b/internal/api/handler_config.go @@ -82,6 +82,31 @@ func (h *Handler) updateConfig(ctx context.Context, req *events.LambdaFunctionUR return &StatusResponse{Status: "updated"}, nil } +// checkCommitmentOptionCombo rejects saves that carry a (term, payment) +// combination we've dynamically confirmed the cloud doesn't sell. Returns +// nil when: no probe service is wired, the service hasn't persisted data +// yet (absent data → fall through to the frontend's hardcoded rules), +// the save isn't AWS, or the combo is valid. Errors from Validate are +// logged and swallowed (permissive) so a transient DB blip never blocks +// a settings save. +func (h *Handler) checkCommitmentOptionCombo(ctx context.Context, cfg config.ServiceConfig) error { + if h.commitmentOpts == nil || cfg.Provider != "aws" || cfg.Term <= 0 || cfg.Payment == "" { + return nil + } + ok, err := h.commitmentOpts.Validate(ctx, cfg.Provider, cfg.Service, cfg.Term, cfg.Payment) + if err != nil { + logging.Warnf("commitment-option validation error (allowing save): %v", err) + return nil + } + if !ok { + return NewClientError(400, fmt.Sprintf( + "%s does not support %dyr %s commitments", + cfg.Service, cfg.Term, cfg.Payment, + )) + } + return nil +} + // mergeServiceConfig loads any existing service config and overlays the four // UI-editable fields (Enabled, Term, Payment, Coverage) from cfg onto it, so // that filter fields set outside the UI (RampSchedule, IncludeEngines, etc.) @@ -172,6 +197,10 @@ func (h *Handler) updateServiceConfig(ctx context.Context, req *events.LambdaFun return nil, NewClientError(400, fmt.Sprintf("validation error: %s", err)) } + if err := h.checkCommitmentOptionCombo(ctx, cfg); err != nil { + return nil, err + } + if err := h.config.SaveServiceConfig(ctx, &cfg); err != nil { return nil, err } diff --git a/internal/api/handler_config_test.go b/internal/api/handler_config_test.go index 8acf8522..feb8cf55 100644 --- a/internal/api/handler_config_test.go +++ b/internal/api/handler_config_test.go @@ -202,6 +202,86 @@ func TestHandler_updateServiceConfig_InvalidBody(t *testing.T) { assert.Contains(t, err.Error(), "invalid request body") } +func TestHandler_updateServiceConfig_CommitmentOptsReject(t *testing.T) { + ctx := context.Background() + mockStore := new(MockConfigStore) + mockAuth := new(MockAuthService) + + adminSession := &Session{ + UserID: "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", + Email: "admin@example.com", + Role: "admin", + } + mockAuth.On("ValidateSession", ctx, "admin-token").Return(adminSession, nil) + mockStore.On("GetServiceConfig", ctx, "aws", "rds").Return(nil, nil) + + // Probe data says RDS 3yr no-upfront doesn't exist. Save must 400. + // SaveServiceConfig is NOT set up — asserting it's never called. + handler := &Handler{ + config: mockStore, + auth: mockAuth, + commitmentOpts: &stubCommitmentOpts{ + validateFn: func(_ context.Context, provider, service string, term int, payment string) (bool, error) { + assert.Equal(t, "aws", provider) + assert.Equal(t, "rds", service) + assert.Equal(t, 3, term) + assert.Equal(t, "no-upfront", payment) + return false, nil + }, + }, + } + + body := `{"enabled": true, "term": 3, "payment": "no-upfront", "coverage": 80}` + req := &events.LambdaFunctionURLRequest{ + Headers: map[string]string{"Authorization": "Bearer admin-token"}, + Body: body, + } + result, err := handler.updateServiceConfig(ctx, req, "aws/rds") + + require.Error(t, err) + assert.Nil(t, result) + ce, ok := IsClientError(err) + require.True(t, ok) + assert.Equal(t, 400, ce.code) + assert.Contains(t, ce.message, "3yr no-upfront") + mockStore.AssertNotCalled(t, "SaveServiceConfig", mock.Anything, mock.Anything) +} + +func TestHandler_updateServiceConfig_CommitmentOptsAccept(t *testing.T) { + ctx := context.Background() + mockStore := new(MockConfigStore) + mockAuth := new(MockAuthService) + + adminSession := &Session{ + UserID: "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", + Email: "admin@example.com", + Role: "admin", + } + mockAuth.On("ValidateSession", ctx, "admin-token").Return(adminSession, nil) + mockStore.On("GetServiceConfig", ctx, "aws", "rds").Return(nil, nil) + mockStore.On("SaveServiceConfig", ctx, mock.AnythingOfType("*config.ServiceConfig")).Return(nil) + + handler := &Handler{ + config: mockStore, + auth: mockAuth, + commitmentOpts: &stubCommitmentOpts{ + validateFn: func(context.Context, string, string, int, string) (bool, error) { + return true, nil + }, + }, + } + + body := `{"enabled": true, "term": 1, "payment": "all-upfront", "coverage": 80}` + req := &events.LambdaFunctionURLRequest{ + Headers: map[string]string{"Authorization": "Bearer admin-token"}, + Body: body, + } + result, err := handler.updateServiceConfig(ctx, req, "aws/rds") + + require.NoError(t, err) + assert.Equal(t, "updated", result.Status) +} + func TestHandler_updateServiceConfig_NoSlash(t *testing.T) { ctx := context.Background() mockStore := new(MockConfigStore) diff --git a/internal/api/router.go b/internal/api/router.go index bc26e908..6a514531 100644 --- a/internal/api/router.go +++ b/internal/api/router.go @@ -70,6 +70,11 @@ func (r *Router) registerRoutes() { {PathPrefix: "/api/config/service/", Method: "GET", Handler: r.getServiceConfigHandler}, {PathPrefix: "/api/config/service/", Method: "PUT", Handler: r.updateServiceConfigHandler}, + // Dynamically-probed AWS commitment-option combos. Non-admin reads + // (AuthUser) — data is public-ish (hardcoded in the frontend today) + // but we don't expose it unauthenticated. + {ExactPath: "/api/commitment-options", Method: "GET", Handler: r.commitmentOptionsHandler, Auth: AuthUser}, + // Recommendations endpoints {ExactPath: "/api/recommendations", Method: "GET", Handler: r.getRecommendationsHandler}, {ExactPath: "/api/recommendations/freshness", Method: "GET", Handler: r.getRecommendationsFreshnessHandler}, @@ -293,6 +298,10 @@ func (r *Router) updateServiceConfigHandler(ctx context.Context, req *events.Lam return r.h.updateServiceConfig(ctx, req, params["id"]) } +func (r *Router) commitmentOptionsHandler(ctx context.Context, req *events.LambdaFunctionURLRequest, params map[string]string) (any, error) { + return r.h.getCommitmentOptions(ctx) +} + func (r *Router) getRecommendationsHandler(ctx context.Context, req *events.LambdaFunctionURLRequest, params map[string]string) (any, error) { return r.h.getRecommendations(ctx, req, req.QueryStringParameters) } diff --git a/internal/api/types.go b/internal/api/types.go index 0609780d..ea92a04c 100644 --- a/internal/api/types.go +++ b/internal/api/types.go @@ -6,6 +6,7 @@ import ( "sync" "time" + "github.com/LeanerCloud/CUDly/internal/commitmentopts" "github.com/LeanerCloud/CUDly/internal/config" "github.com/LeanerCloud/CUDly/internal/credentials" "github.com/LeanerCloud/CUDly/internal/email" @@ -68,6 +69,18 @@ type HandlerConfig struct { // publish in the Discovery document. Must match what Azure AD // federated credentials are registered with. OIDCIssuerURL string + // CommitmentOpts discovers which (term, payment) combinations each + // AWS service actually sells and validates saves against that data. + // Nil disables both the /api/commitment-options endpoint (returns + // unavailable) and save-side validation in updateServiceConfig. + CommitmentOpts CommitmentOptsInterface +} + +// CommitmentOptsInterface lets us swap the real *commitmentopts.Service for +// a stub in handler tests without pulling in the probe+store machinery. +type CommitmentOptsInterface interface { + Get(ctx context.Context) (commitmentopts.Options, error) + Validate(ctx context.Context, provider, service string, term int, payment string) (bool, error) } // AnalyticsClientInterface defines the interface for analytics queries diff --git a/internal/commitmentopts/normalize.go b/internal/commitmentopts/normalize.go new file mode 100644 index 00000000..05aa1907 --- /dev/null +++ b/internal/commitmentopts/normalize.go @@ -0,0 +1,78 @@ +package commitmentopts + +import "strings" + +// oneYearSeconds and threeYearSeconds are the only reservation durations +// AWS currently sells across the services we probe. Anything else (90-day +// heavy-utilization RIs, 5-year Redshift offerings on the rare occasion +// they exist) is dropped — the frontend only exposes 1yr/3yr in the UI so +// other durations have no place to surface. +const ( + oneYearSeconds int64 = 31536000 + threeYearSeconds int64 = 94608000 +) + +// durationToTerm maps a duration-in-seconds (as returned by the AWS +// Describe*Offerings APIs) to a term in whole years. Returns (0, false) +// for anything outside {1yr, 3yr}; callers drop those Combos. +func durationToTerm(seconds int64) (int, bool) { + switch seconds { + case oneYearSeconds: + return 1, true + case threeYearSeconds: + return 3, true + default: + return 0, false + } +} + +// normalizePayment canonicalizes the messy zoo of payment-option spellings +// AWS uses across services into one of our three tokens. Input may be: +// +// - "All Upfront" / "Partial Upfront" / "No Upfront" +// (RDS/ElastiCache/Redshift/MemoryDB OfferingType strings) +// - "ALL_UPFRONT" / "PARTIAL_UPFRONT" / "NO_UPFRONT" +// (OpenSearch's ReservedInstancePaymentOption enum stringer) +// - already-canonical "all-upfront" etc. +// +// It explicitly rejects legacy pre-2011 ElastiCache/EC2 utilization tokens +// ("Light Utilization" / "Medium Utilization" / "Heavy Utilization") — those +// predate the modern (term, payment) model and there is no sensible mapping +// into it. +func normalizePayment(raw string) (string, bool) { + token := stripNonAlnumLower(raw) + switch token { + case "allupfront": + return "all-upfront", true + case "partialupfront": + return "partial-upfront", true + case "noupfront": + return "no-upfront", true + case "lightutilization", "mediumutilization", "heavyutilization": + // Legacy utilization-based offerings — deliberately rejected. + return "", false + default: + return "", false + } +} + +// stripNonAlnumLower returns s lowercased with every non-alphanumeric +// character removed. It is the canonical form used to compare the many +// payment-option spellings AWS APIs emit. +func stripNonAlnumLower(s string) string { + var b strings.Builder + b.Grow(len(s)) + for _, r := range s { + switch { + case r >= 'A' && r <= 'Z': + b.WriteRune(r + ('a' - 'A')) + case r >= 'a' && r <= 'z': + b.WriteRune(r) + case r >= '0' && r <= '9': + b.WriteRune(r) + default: + // Drop spaces, hyphens, underscores, and everything else. + } + } + return b.String() +} diff --git a/internal/commitmentopts/normalize_test.go b/internal/commitmentopts/normalize_test.go new file mode 100644 index 00000000..7053775e --- /dev/null +++ b/internal/commitmentopts/normalize_test.go @@ -0,0 +1,94 @@ +package commitmentopts + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestDurationToTerm(t *testing.T) { + cases := []struct { + name string + seconds int64 + term int + ok bool + }{ + {"1yr", 31536000, 1, true}, + {"3yr", 94608000, 3, true}, + {"zero", 0, 0, false}, + {"negative", -1, 0, false}, + {"5yr", 5 * 31536000, 0, false}, + {"90 days", 90 * 86400, 0, false}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + term, ok := durationToTerm(tc.seconds) + assert.Equal(t, tc.term, term) + assert.Equal(t, tc.ok, ok) + }) + } +} + +func TestNormalizePayment(t *testing.T) { + cases := []struct { + name string + in string + out string + ok bool + }{ + // SDK-typed string variants ("All Upfront" / ...). + {"rds All Upfront", "All Upfront", "all-upfront", true}, + {"rds Partial Upfront", "Partial Upfront", "partial-upfront", true}, + {"rds No Upfront", "No Upfront", "no-upfront", true}, + + // OpenSearch enum stringer variant (ALL_UPFRONT). + {"enum ALL_UPFRONT", "ALL_UPFRONT", "all-upfront", true}, + {"enum PARTIAL_UPFRONT", "PARTIAL_UPFRONT", "partial-upfront", true}, + {"enum NO_UPFRONT", "NO_UPFRONT", "no-upfront", true}, + + // Already-canonical lowercase-kebab form. + {"canonical all-upfront", "all-upfront", "all-upfront", true}, + {"canonical partial-upfront", "partial-upfront", "partial-upfront", true}, + {"canonical no-upfront", "no-upfront", "no-upfront", true}, + + // Mixed-case / weird spacing — still normalizes. + {"mixed case", "aLl UpFrOnT", "all-upfront", true}, + {"leading/trailing whitespace", " All Upfront ", "all-upfront", true}, + {"underscore lowercase", "all_upfront", "all-upfront", true}, + + // Legacy utilization-based offerings must be rejected. + {"light utilization", "Light Utilization", "", false}, + {"medium utilization", "Medium Utilization", "", false}, + {"heavy utilization", "Heavy Utilization", "", false}, + {"light lowercase", "lightutilization", "", false}, + + // Unknown payment option — dropped. + {"unknown", "Something Else", "", false}, + {"empty", "", "", false}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got, ok := normalizePayment(tc.in) + assert.Equal(t, tc.out, got) + assert.Equal(t, tc.ok, ok) + }) + } +} + +func TestStripNonAlnumLower(t *testing.T) { + cases := []struct { + in string + out string + }{ + {"All Upfront", "allupfront"}, + {"ALL_UPFRONT", "allupfront"}, + {" all-upfront ", "allupfront"}, + {"a1b2c3", "a1b2c3"}, + {"", ""}, + } + for _, tc := range cases { + t.Run(tc.in, func(t *testing.T) { + assert.Equal(t, tc.out, stripNonAlnumLower(tc.in)) + }) + } +} diff --git a/internal/commitmentopts/probe.go b/internal/commitmentopts/probe.go new file mode 100644 index 00000000..b293a65f --- /dev/null +++ b/internal/commitmentopts/probe.go @@ -0,0 +1,426 @@ +package commitmentopts + +import ( + "context" + "fmt" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/ec2" + ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" + "github.com/aws/aws-sdk-go-v2/service/elasticache" + "github.com/aws/aws-sdk-go-v2/service/memorydb" + "github.com/aws/aws-sdk-go-v2/service/opensearch" + "github.com/aws/aws-sdk-go-v2/service/rds" + "github.com/aws/aws-sdk-go-v2/service/redshift" +) + +// maxPages is the hard ceiling on paginated Describe*Offerings calls per +// probe. We only need unique (duration, payment) tuples, which saturate +// well before 5 pages of 100 offerings each. The cap bounds worst-case API +// spend if an SDK change ever breaks pagination detection. +const maxPages = 5 + +// pageSize is the MaxRecords/MaxResults value we request. 100 is the +// documented maximum for every AWS Describe*Offerings API we call. +const pageSize int32 = 100 + +// Canonical probe targets. Picked per service so "offerings exist" is +// guaranteed in every commercial region — small/cheap instance types with +// long-standing public availability. The targets never round-trip through +// a purchase so their cost is irrelevant; what matters is that AWS +// actually has offerings to return for them. +const ( + probeTargetRDS = "db.t3.micro" + probeTargetElastiCache = "cache.t3.micro" + probeTargetOpenSearch = "t3.small.search" + probeTargetRedshift = "dc2.large" + probeTargetMemoryDB = "db.t4g.small" + probeTargetEC2 = "t3.micro" +) + +// collect dedupes a probe's raw (durationSeconds, rawPayment) pairs, runs +// both normalizers, and builds the Combo slice. Duplicates — a single +// (term, payment) tuple appears once per instance size × AZ × engine +// variant — are collapsed so the caller sees at most six Combos per +// service (2 terms × 3 payments). +func collect(service string, raw []rawOffer) []Combo { + type key struct { + term int + payment string + } + seen := make(map[key]struct{}, len(raw)) + out := make([]Combo, 0, len(raw)) + for _, r := range raw { + term, ok := durationToTerm(r.durationSeconds) + if !ok { + continue + } + payment, ok := normalizePayment(r.payment) + if !ok { + continue + } + k := key{term: term, payment: payment} + if _, dup := seen[k]; dup { + continue + } + seen[k] = struct{}{} + out = append(out, Combo{ + Provider: "aws", + Service: service, + TermYears: term, + Payment: payment, + }) + } + return out +} + +// rawOffer is the pre-normalization shape every per-service probe feeds +// into collect(). Keeping the shape uniform means normalization lives in +// exactly one place. +type rawOffer struct { + durationSeconds int64 + payment string +} + +// --------------------------------------------------------------------------- +// RDS +// --------------------------------------------------------------------------- + +// RDSDescribeOfferings is the minimal RDS surface the probe needs. It +// matches the method signature on the generated client so tests can +// substitute a mock without dragging in the full RDSAPI interface. +type RDSDescribeOfferings interface { + DescribeReservedDBInstancesOfferings(ctx context.Context, params *rds.DescribeReservedDBInstancesOfferingsInput, optFns ...func(*rds.Options)) (*rds.DescribeReservedDBInstancesOfferingsOutput, error) +} + +// RDSProber probes rds:DescribeReservedDBInstancesOfferings. +type RDSProber struct { + // NewClient builds a client from the probe's aws.Config. Override in + // tests to return a mock. + NewClient func(cfg aws.Config) RDSDescribeOfferings +} + +// Service returns "rds". +func (p *RDSProber) Service() string { return "rds" } + +// Probe returns the normalized (term, payment) combos RDS currently sells +// against db.t3.micro. +func (p *RDSProber) Probe(ctx context.Context, cfg aws.Config) ([]Combo, error) { + client := p.client(cfg) + var raw []rawOffer + var marker *string + for page := 0; page < maxPages; page++ { + out, err := client.DescribeReservedDBInstancesOfferings(ctx, &rds.DescribeReservedDBInstancesOfferingsInput{ + DBInstanceClass: aws.String(probeTargetRDS), + MaxRecords: aws.Int32(pageSize), + Marker: marker, + }) + if err != nil { + return nil, fmt.Errorf("rds: %w", err) + } + for _, o := range out.ReservedDBInstancesOfferings { + raw = append(raw, rawOffer{ + durationSeconds: int64(aws.ToInt32(o.Duration)), + payment: aws.ToString(o.OfferingType), + }) + } + if out.Marker == nil || aws.ToString(out.Marker) == "" { + break + } + marker = out.Marker + } + return collect(p.Service(), raw), nil +} + +func (p *RDSProber) client(cfg aws.Config) RDSDescribeOfferings { + if p.NewClient != nil { + return p.NewClient(cfg) + } + return rds.NewFromConfig(cfg) +} + +// --------------------------------------------------------------------------- +// ElastiCache +// --------------------------------------------------------------------------- + +// ElastiCacheDescribeOfferings is the minimal ElastiCache surface we use. +type ElastiCacheDescribeOfferings interface { + DescribeReservedCacheNodesOfferings(ctx context.Context, params *elasticache.DescribeReservedCacheNodesOfferingsInput, optFns ...func(*elasticache.Options)) (*elasticache.DescribeReservedCacheNodesOfferingsOutput, error) +} + +// ElastiCacheProber probes elasticache:DescribeReservedCacheNodesOfferings. +type ElastiCacheProber struct { + NewClient func(cfg aws.Config) ElastiCacheDescribeOfferings +} + +// Service returns "elasticache". +func (p *ElastiCacheProber) Service() string { return "elasticache" } + +// Probe returns the combos for cache.t3.micro. +func (p *ElastiCacheProber) Probe(ctx context.Context, cfg aws.Config) ([]Combo, error) { + client := p.client(cfg) + var raw []rawOffer + var marker *string + for page := 0; page < maxPages; page++ { + out, err := client.DescribeReservedCacheNodesOfferings(ctx, &elasticache.DescribeReservedCacheNodesOfferingsInput{ + CacheNodeType: aws.String(probeTargetElastiCache), + MaxRecords: aws.Int32(pageSize), + Marker: marker, + }) + if err != nil { + return nil, fmt.Errorf("elasticache: %w", err) + } + for _, o := range out.ReservedCacheNodesOfferings { + raw = append(raw, rawOffer{ + durationSeconds: int64(aws.ToInt32(o.Duration)), + payment: aws.ToString(o.OfferingType), + }) + } + if out.Marker == nil || aws.ToString(out.Marker) == "" { + break + } + marker = out.Marker + } + return collect(p.Service(), raw), nil +} + +func (p *ElastiCacheProber) client(cfg aws.Config) ElastiCacheDescribeOfferings { + if p.NewClient != nil { + return p.NewClient(cfg) + } + return elasticache.NewFromConfig(cfg) +} + +// --------------------------------------------------------------------------- +// OpenSearch +// --------------------------------------------------------------------------- + +// OpenSearchDescribeOfferings is the minimal OpenSearch surface we use. +// The OpenSearch API has no per-instance-type filter on this endpoint, so +// the probe filters client-side after fetching. +type OpenSearchDescribeOfferings interface { + DescribeReservedInstanceOfferings(ctx context.Context, params *opensearch.DescribeReservedInstanceOfferingsInput, optFns ...func(*opensearch.Options)) (*opensearch.DescribeReservedInstanceOfferingsOutput, error) +} + +// OpenSearchProber probes opensearch:DescribeReservedInstanceOfferings. +type OpenSearchProber struct { + NewClient func(cfg aws.Config) OpenSearchDescribeOfferings +} + +// Service returns "opensearch". +func (p *OpenSearchProber) Service() string { return "opensearch" } + +// Probe returns the combos for t3.small.search. +func (p *OpenSearchProber) Probe(ctx context.Context, cfg aws.Config) ([]Combo, error) { + client := p.client(cfg) + var raw []rawOffer + var nextToken *string + for page := 0; page < maxPages; page++ { + out, err := client.DescribeReservedInstanceOfferings(ctx, &opensearch.DescribeReservedInstanceOfferingsInput{ + MaxResults: pageSize, + NextToken: nextToken, + }) + if err != nil { + return nil, fmt.Errorf("opensearch: %w", err) + } + for _, o := range out.ReservedInstanceOfferings { + if string(o.InstanceType) != probeTargetOpenSearch { + continue + } + raw = append(raw, rawOffer{ + durationSeconds: int64(o.Duration), + payment: string(o.PaymentOption), + }) + } + if out.NextToken == nil || aws.ToString(out.NextToken) == "" { + break + } + nextToken = out.NextToken + } + return collect(p.Service(), raw), nil +} + +func (p *OpenSearchProber) client(cfg aws.Config) OpenSearchDescribeOfferings { + if p.NewClient != nil { + return p.NewClient(cfg) + } + return opensearch.NewFromConfig(cfg) +} + +// --------------------------------------------------------------------------- +// Redshift +// --------------------------------------------------------------------------- + +// RedshiftDescribeOfferings is the minimal Redshift surface we use. The +// API has no NodeType filter on DescribeReservedNodeOfferings so the probe +// filters client-side. +type RedshiftDescribeOfferings interface { + DescribeReservedNodeOfferings(ctx context.Context, params *redshift.DescribeReservedNodeOfferingsInput, optFns ...func(*redshift.Options)) (*redshift.DescribeReservedNodeOfferingsOutput, error) +} + +// RedshiftProber probes redshift:DescribeReservedNodeOfferings. +type RedshiftProber struct { + NewClient func(cfg aws.Config) RedshiftDescribeOfferings +} + +// Service returns "redshift". +func (p *RedshiftProber) Service() string { return "redshift" } + +// Probe returns the combos for dc2.large. +func (p *RedshiftProber) Probe(ctx context.Context, cfg aws.Config) ([]Combo, error) { + client := p.client(cfg) + var raw []rawOffer + var marker *string + for page := 0; page < maxPages; page++ { + out, err := client.DescribeReservedNodeOfferings(ctx, &redshift.DescribeReservedNodeOfferingsInput{ + MaxRecords: aws.Int32(pageSize), + Marker: marker, + }) + if err != nil { + return nil, fmt.Errorf("redshift: %w", err) + } + for _, o := range out.ReservedNodeOfferings { + if aws.ToString(o.NodeType) != probeTargetRedshift { + continue + } + raw = append(raw, rawOffer{ + durationSeconds: int64(aws.ToInt32(o.Duration)), + payment: aws.ToString(o.OfferingType), + }) + } + if out.Marker == nil || aws.ToString(out.Marker) == "" { + break + } + marker = out.Marker + } + return collect(p.Service(), raw), nil +} + +func (p *RedshiftProber) client(cfg aws.Config) RedshiftDescribeOfferings { + if p.NewClient != nil { + return p.NewClient(cfg) + } + return redshift.NewFromConfig(cfg) +} + +// --------------------------------------------------------------------------- +// MemoryDB +// --------------------------------------------------------------------------- + +// MemoryDBDescribeOfferings is the minimal MemoryDB surface we use. +type MemoryDBDescribeOfferings interface { + DescribeReservedNodesOfferings(ctx context.Context, params *memorydb.DescribeReservedNodesOfferingsInput, optFns ...func(*memorydb.Options)) (*memorydb.DescribeReservedNodesOfferingsOutput, error) +} + +// MemoryDBProber probes memorydb:DescribeReservedNodesOfferings. +type MemoryDBProber struct { + NewClient func(cfg aws.Config) MemoryDBDescribeOfferings +} + +// Service returns "memorydb". +func (p *MemoryDBProber) Service() string { return "memorydb" } + +// Probe returns the combos for db.t4g.small. +func (p *MemoryDBProber) Probe(ctx context.Context, cfg aws.Config) ([]Combo, error) { + client := p.client(cfg) + var raw []rawOffer + var nextToken *string + for page := 0; page < maxPages; page++ { + out, err := client.DescribeReservedNodesOfferings(ctx, &memorydb.DescribeReservedNodesOfferingsInput{ + NodeType: aws.String(probeTargetMemoryDB), + MaxResults: aws.Int32(pageSize), + NextToken: nextToken, + }) + if err != nil { + return nil, fmt.Errorf("memorydb: %w", err) + } + for _, o := range out.ReservedNodesOfferings { + raw = append(raw, rawOffer{ + durationSeconds: int64(o.Duration), + payment: aws.ToString(o.OfferingType), + }) + } + if out.NextToken == nil || aws.ToString(out.NextToken) == "" { + break + } + nextToken = out.NextToken + } + return collect(p.Service(), raw), nil +} + +func (p *MemoryDBProber) client(cfg aws.Config) MemoryDBDescribeOfferings { + if p.NewClient != nil { + return p.NewClient(cfg) + } + return memorydb.NewFromConfig(cfg) +} + +// --------------------------------------------------------------------------- +// EC2 +// --------------------------------------------------------------------------- + +// EC2DescribeOfferings is the minimal EC2 surface we use. +type EC2DescribeOfferings interface { + DescribeReservedInstancesOfferings(ctx context.Context, params *ec2.DescribeReservedInstancesOfferingsInput, optFns ...func(*ec2.Options)) (*ec2.DescribeReservedInstancesOfferingsOutput, error) +} + +// EC2Prober probes ec2:DescribeReservedInstancesOfferings. +type EC2Prober struct { + NewClient func(cfg aws.Config) EC2DescribeOfferings +} + +// Service returns "ec2". +func (p *EC2Prober) Service() string { return "ec2" } + +// Probe returns the combos for t3.micro. IncludeMarketplace is explicitly +// false so we only see AWS-native (standard/convertible) offerings — the +// Marketplace resale market has arbitrary durations that would pollute +// normalization. +func (p *EC2Prober) Probe(ctx context.Context, cfg aws.Config) ([]Combo, error) { + client := p.client(cfg) + var raw []rawOffer + var nextToken *string + for page := 0; page < maxPages; page++ { + out, err := client.DescribeReservedInstancesOfferings(ctx, &ec2.DescribeReservedInstancesOfferingsInput{ + InstanceType: ec2types.InstanceType(probeTargetEC2), + IncludeMarketplace: aws.Bool(false), + MaxResults: aws.Int32(pageSize), + NextToken: nextToken, + }) + if err != nil { + return nil, fmt.Errorf("ec2: %w", err) + } + for _, o := range out.ReservedInstancesOfferings { + raw = append(raw, rawOffer{ + durationSeconds: aws.ToInt64(o.Duration), + payment: string(o.OfferingType), + }) + } + if out.NextToken == nil || aws.ToString(out.NextToken) == "" { + break + } + nextToken = out.NextToken + } + return collect(p.Service(), raw), nil +} + +func (p *EC2Prober) client(cfg aws.Config) EC2DescribeOfferings { + if p.NewClient != nil { + return p.NewClient(cfg) + } + return ec2.NewFromConfig(cfg) +} + +// DefaultProbers returns one prober instance per commitment-capable +// service. The Service wires these up by default; tests override via a +// custom slice. +func DefaultProbers() []Prober { + return []Prober{ + &RDSProber{}, + &ElastiCacheProber{}, + &OpenSearchProber{}, + &RedshiftProber{}, + &MemoryDBProber{}, + &EC2Prober{}, + } +} diff --git a/internal/commitmentopts/probe_test.go b/internal/commitmentopts/probe_test.go new file mode 100644 index 00000000..fd66dba7 --- /dev/null +++ b/internal/commitmentopts/probe_test.go @@ -0,0 +1,346 @@ +package commitmentopts + +import ( + "context" + "errors" + "sort" + "testing" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/ec2" + ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" + "github.com/aws/aws-sdk-go-v2/service/elasticache" + elasticachetypes "github.com/aws/aws-sdk-go-v2/service/elasticache/types" + "github.com/aws/aws-sdk-go-v2/service/memorydb" + memorydbtypes "github.com/aws/aws-sdk-go-v2/service/memorydb/types" + "github.com/aws/aws-sdk-go-v2/service/opensearch" + opensearchtypes "github.com/aws/aws-sdk-go-v2/service/opensearch/types" + "github.com/aws/aws-sdk-go-v2/service/rds" + rdstypes "github.com/aws/aws-sdk-go-v2/service/rds/types" + "github.com/aws/aws-sdk-go-v2/service/redshift" + redshifttypes "github.com/aws/aws-sdk-go-v2/service/redshift/types" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// sortCombos gives tests a stable comparison order — probers dedupe by +// map keys so iteration order is otherwise nondeterministic. +func sortCombos(c []Combo) []Combo { + sort.Slice(c, func(i, j int) bool { + if c[i].TermYears != c[j].TermYears { + return c[i].TermYears < c[j].TermYears + } + return c[i].Payment < c[j].Payment + }) + return c +} + +// --------------------------------------------------------------------------- +// RDS +// --------------------------------------------------------------------------- + +type fakeRDS struct { + fn func(*rds.DescribeReservedDBInstancesOfferingsInput) (*rds.DescribeReservedDBInstancesOfferingsOutput, error) +} + +func (f *fakeRDS) DescribeReservedDBInstancesOfferings(ctx context.Context, in *rds.DescribeReservedDBInstancesOfferingsInput, _ ...func(*rds.Options)) (*rds.DescribeReservedDBInstancesOfferingsOutput, error) { + return f.fn(in) +} + +func TestRDSProber_Probe(t *testing.T) { + fake := &fakeRDS{ + fn: func(in *rds.DescribeReservedDBInstancesOfferingsInput) (*rds.DescribeReservedDBInstancesOfferingsOutput, error) { + assert.Equal(t, probeTargetRDS, aws.ToString(in.DBInstanceClass)) + return &rds.DescribeReservedDBInstancesOfferingsOutput{ + ReservedDBInstancesOfferings: []rdstypes.ReservedDBInstancesOffering{ + {Duration: aws.Int32(31536000), OfferingType: aws.String("All Upfront")}, + {Duration: aws.Int32(31536000), OfferingType: aws.String("Partial Upfront")}, + {Duration: aws.Int32(31536000), OfferingType: aws.String("No Upfront")}, + {Duration: aws.Int32(94608000), OfferingType: aws.String("All Upfront")}, + // dup of a 1yr All Upfront — must collapse + {Duration: aws.Int32(31536000), OfferingType: aws.String("All Upfront")}, + // anomalies + {Duration: aws.Int32(94608000), OfferingType: aws.String("Light Utilization")}, + {Duration: aws.Int32(5 * 31536000), OfferingType: aws.String("All Upfront")}, + {Duration: aws.Int32(31536000), OfferingType: aws.String("Mystery Option")}, + }, + }, nil + }, + } + p := &RDSProber{NewClient: func(cfg aws.Config) RDSDescribeOfferings { return fake }} + + assert.Equal(t, "rds", p.Service()) + + got, err := p.Probe(context.Background(), aws.Config{}) + require.NoError(t, err) + got = sortCombos(got) + want := []Combo{ + {Provider: "aws", Service: "rds", TermYears: 1, Payment: "all-upfront"}, + {Provider: "aws", Service: "rds", TermYears: 1, Payment: "no-upfront"}, + {Provider: "aws", Service: "rds", TermYears: 1, Payment: "partial-upfront"}, + {Provider: "aws", Service: "rds", TermYears: 3, Payment: "all-upfront"}, + } + assert.Equal(t, want, got) +} + +func TestRDSProber_ErrorPropagates(t *testing.T) { + boom := errors.New("boom") + fake := &fakeRDS{fn: func(*rds.DescribeReservedDBInstancesOfferingsInput) (*rds.DescribeReservedDBInstancesOfferingsOutput, error) { + return nil, boom + }} + p := &RDSProber{NewClient: func(cfg aws.Config) RDSDescribeOfferings { return fake }} + _, err := p.Probe(context.Background(), aws.Config{}) + require.Error(t, err) + assert.ErrorIs(t, err, boom) +} + +func TestRDSProber_PageCap(t *testing.T) { + // Every page returns a non-empty marker; the probe must stop after + // maxPages to bound API spend. + calls := 0 + fake := &fakeRDS{fn: func(*rds.DescribeReservedDBInstancesOfferingsInput) (*rds.DescribeReservedDBInstancesOfferingsOutput, error) { + calls++ + return &rds.DescribeReservedDBInstancesOfferingsOutput{ + Marker: aws.String("more"), + }, nil + }} + p := &RDSProber{NewClient: func(cfg aws.Config) RDSDescribeOfferings { return fake }} + _, err := p.Probe(context.Background(), aws.Config{}) + require.NoError(t, err) + assert.Equal(t, maxPages, calls) +} + +// --------------------------------------------------------------------------- +// ElastiCache +// --------------------------------------------------------------------------- + +type fakeElastiCache struct { + fn func(*elasticache.DescribeReservedCacheNodesOfferingsInput) (*elasticache.DescribeReservedCacheNodesOfferingsOutput, error) +} + +func (f *fakeElastiCache) DescribeReservedCacheNodesOfferings(ctx context.Context, in *elasticache.DescribeReservedCacheNodesOfferingsInput, _ ...func(*elasticache.Options)) (*elasticache.DescribeReservedCacheNodesOfferingsOutput, error) { + return f.fn(in) +} + +func TestElastiCacheProber_Probe(t *testing.T) { + fake := &fakeElastiCache{ + fn: func(in *elasticache.DescribeReservedCacheNodesOfferingsInput) (*elasticache.DescribeReservedCacheNodesOfferingsOutput, error) { + assert.Equal(t, probeTargetElastiCache, aws.ToString(in.CacheNodeType)) + return &elasticache.DescribeReservedCacheNodesOfferingsOutput{ + ReservedCacheNodesOfferings: []elasticachetypes.ReservedCacheNodesOffering{ + {Duration: aws.Int32(31536000), OfferingType: aws.String("All Upfront")}, + {Duration: aws.Int32(94608000), OfferingType: aws.String("No Upfront")}, + // legacy utilization-style — must be dropped + {Duration: aws.Int32(94608000), OfferingType: aws.String("Heavy Utilization")}, + }, + }, nil + }, + } + p := &ElastiCacheProber{NewClient: func(cfg aws.Config) ElastiCacheDescribeOfferings { return fake }} + assert.Equal(t, "elasticache", p.Service()) + + got, err := p.Probe(context.Background(), aws.Config{}) + require.NoError(t, err) + got = sortCombos(got) + want := []Combo{ + {Provider: "aws", Service: "elasticache", TermYears: 1, Payment: "all-upfront"}, + {Provider: "aws", Service: "elasticache", TermYears: 3, Payment: "no-upfront"}, + } + assert.Equal(t, want, got) +} + +// --------------------------------------------------------------------------- +// OpenSearch +// --------------------------------------------------------------------------- + +type fakeOpenSearch struct { + fn func(*opensearch.DescribeReservedInstanceOfferingsInput) (*opensearch.DescribeReservedInstanceOfferingsOutput, error) +} + +func (f *fakeOpenSearch) DescribeReservedInstanceOfferings(ctx context.Context, in *opensearch.DescribeReservedInstanceOfferingsInput, _ ...func(*opensearch.Options)) (*opensearch.DescribeReservedInstanceOfferingsOutput, error) { + return f.fn(in) +} + +func TestOpenSearchProber_Probe(t *testing.T) { + fake := &fakeOpenSearch{ + fn: func(in *opensearch.DescribeReservedInstanceOfferingsInput) (*opensearch.DescribeReservedInstanceOfferingsOutput, error) { + return &opensearch.DescribeReservedInstanceOfferingsOutput{ + ReservedInstanceOfferings: []opensearchtypes.ReservedInstanceOffering{ + { + InstanceType: opensearchtypes.OpenSearchPartitionInstanceType(probeTargetOpenSearch), + Duration: 31536000, + PaymentOption: opensearchtypes.ReservedInstancePaymentOptionAllUpfront, + }, + { + InstanceType: opensearchtypes.OpenSearchPartitionInstanceType(probeTargetOpenSearch), + Duration: 94608000, + PaymentOption: opensearchtypes.ReservedInstancePaymentOptionPartialUpfront, + }, + // off-instance-type — must be filtered out client-side + { + InstanceType: opensearchtypes.OpenSearchPartitionInstanceType("r6g.large.search"), + Duration: 31536000, + PaymentOption: opensearchtypes.ReservedInstancePaymentOptionNoUpfront, + }, + }, + }, nil + }, + } + p := &OpenSearchProber{NewClient: func(cfg aws.Config) OpenSearchDescribeOfferings { return fake }} + assert.Equal(t, "opensearch", p.Service()) + + got, err := p.Probe(context.Background(), aws.Config{}) + require.NoError(t, err) + got = sortCombos(got) + want := []Combo{ + {Provider: "aws", Service: "opensearch", TermYears: 1, Payment: "all-upfront"}, + {Provider: "aws", Service: "opensearch", TermYears: 3, Payment: "partial-upfront"}, + } + assert.Equal(t, want, got) +} + +// --------------------------------------------------------------------------- +// Redshift +// --------------------------------------------------------------------------- + +type fakeRedshift struct { + fn func(*redshift.DescribeReservedNodeOfferingsInput) (*redshift.DescribeReservedNodeOfferingsOutput, error) +} + +func (f *fakeRedshift) DescribeReservedNodeOfferings(ctx context.Context, in *redshift.DescribeReservedNodeOfferingsInput, _ ...func(*redshift.Options)) (*redshift.DescribeReservedNodeOfferingsOutput, error) { + return f.fn(in) +} + +func TestRedshiftProber_Probe(t *testing.T) { + fake := &fakeRedshift{ + fn: func(in *redshift.DescribeReservedNodeOfferingsInput) (*redshift.DescribeReservedNodeOfferingsOutput, error) { + return &redshift.DescribeReservedNodeOfferingsOutput{ + ReservedNodeOfferings: []redshifttypes.ReservedNodeOffering{ + { + NodeType: aws.String(probeTargetRedshift), + Duration: aws.Int32(31536000), + OfferingType: aws.String("All Upfront"), + }, + { + NodeType: aws.String(probeTargetRedshift), + Duration: aws.Int32(94608000), + OfferingType: aws.String("No Upfront"), + }, + // off-node-type — filtered + { + NodeType: aws.String("ra3.xlplus"), + Duration: aws.Int32(31536000), + OfferingType: aws.String("All Upfront"), + }, + }, + }, nil + }, + } + p := &RedshiftProber{NewClient: func(cfg aws.Config) RedshiftDescribeOfferings { return fake }} + assert.Equal(t, "redshift", p.Service()) + + got, err := p.Probe(context.Background(), aws.Config{}) + require.NoError(t, err) + got = sortCombos(got) + want := []Combo{ + {Provider: "aws", Service: "redshift", TermYears: 1, Payment: "all-upfront"}, + {Provider: "aws", Service: "redshift", TermYears: 3, Payment: "no-upfront"}, + } + assert.Equal(t, want, got) +} + +// --------------------------------------------------------------------------- +// MemoryDB +// --------------------------------------------------------------------------- + +type fakeMemoryDB struct { + fn func(*memorydb.DescribeReservedNodesOfferingsInput) (*memorydb.DescribeReservedNodesOfferingsOutput, error) +} + +func (f *fakeMemoryDB) DescribeReservedNodesOfferings(ctx context.Context, in *memorydb.DescribeReservedNodesOfferingsInput, _ ...func(*memorydb.Options)) (*memorydb.DescribeReservedNodesOfferingsOutput, error) { + return f.fn(in) +} + +func TestMemoryDBProber_Probe(t *testing.T) { + fake := &fakeMemoryDB{ + fn: func(in *memorydb.DescribeReservedNodesOfferingsInput) (*memorydb.DescribeReservedNodesOfferingsOutput, error) { + assert.Equal(t, probeTargetMemoryDB, aws.ToString(in.NodeType)) + return &memorydb.DescribeReservedNodesOfferingsOutput{ + ReservedNodesOfferings: []memorydbtypes.ReservedNodesOffering{ + {Duration: 31536000, OfferingType: aws.String("All Upfront")}, + {Duration: 94608000, OfferingType: aws.String("Partial Upfront")}, + // anomaly: 18-month duration + {Duration: int32(18 * 30 * 86400), OfferingType: aws.String("All Upfront")}, + }, + }, nil + }, + } + p := &MemoryDBProber{NewClient: func(cfg aws.Config) MemoryDBDescribeOfferings { return fake }} + assert.Equal(t, "memorydb", p.Service()) + + got, err := p.Probe(context.Background(), aws.Config{}) + require.NoError(t, err) + got = sortCombos(got) + want := []Combo{ + {Provider: "aws", Service: "memorydb", TermYears: 1, Payment: "all-upfront"}, + {Provider: "aws", Service: "memorydb", TermYears: 3, Payment: "partial-upfront"}, + } + assert.Equal(t, want, got) +} + +// --------------------------------------------------------------------------- +// EC2 +// --------------------------------------------------------------------------- + +type fakeEC2 struct { + fn func(*ec2.DescribeReservedInstancesOfferingsInput) (*ec2.DescribeReservedInstancesOfferingsOutput, error) +} + +func (f *fakeEC2) DescribeReservedInstancesOfferings(ctx context.Context, in *ec2.DescribeReservedInstancesOfferingsInput, _ ...func(*ec2.Options)) (*ec2.DescribeReservedInstancesOfferingsOutput, error) { + return f.fn(in) +} + +func TestEC2Prober_Probe(t *testing.T) { + fake := &fakeEC2{ + fn: func(in *ec2.DescribeReservedInstancesOfferingsInput) (*ec2.DescribeReservedInstancesOfferingsOutput, error) { + assert.Equal(t, ec2types.InstanceType(probeTargetEC2), in.InstanceType) + require.NotNil(t, in.IncludeMarketplace) + assert.False(t, aws.ToBool(in.IncludeMarketplace)) + return &ec2.DescribeReservedInstancesOfferingsOutput{ + ReservedInstancesOfferings: []ec2types.ReservedInstancesOffering{ + {Duration: aws.Int64(31536000), OfferingType: ec2types.OfferingTypeValuesAllUpfront}, + {Duration: aws.Int64(31536000), OfferingType: ec2types.OfferingTypeValuesPartialUpfront}, + {Duration: aws.Int64(31536000), OfferingType: ec2types.OfferingTypeValuesNoUpfront}, + {Duration: aws.Int64(94608000), OfferingType: ec2types.OfferingTypeValuesAllUpfront}, + // legacy pre-2011 utilization — must be dropped + {Duration: aws.Int64(31536000), OfferingType: ec2types.OfferingTypeValuesHeavyUtilization}, + {Duration: aws.Int64(31536000), OfferingType: ec2types.OfferingTypeValuesMediumUtilization}, + {Duration: aws.Int64(31536000), OfferingType: ec2types.OfferingTypeValuesLightUtilization}, + }, + }, nil + }, + } + p := &EC2Prober{NewClient: func(cfg aws.Config) EC2DescribeOfferings { return fake }} + assert.Equal(t, "ec2", p.Service()) + + got, err := p.Probe(context.Background(), aws.Config{}) + require.NoError(t, err) + got = sortCombos(got) + want := []Combo{ + {Provider: "aws", Service: "ec2", TermYears: 1, Payment: "all-upfront"}, + {Provider: "aws", Service: "ec2", TermYears: 1, Payment: "no-upfront"}, + {Provider: "aws", Service: "ec2", TermYears: 1, Payment: "partial-upfront"}, + {Provider: "aws", Service: "ec2", TermYears: 3, Payment: "all-upfront"}, + } + assert.Equal(t, want, got) +} + +func TestDefaultProbers(t *testing.T) { + probers := DefaultProbers() + services := make([]string, 0, len(probers)) + for _, p := range probers { + services = append(services, p.Service()) + } + sort.Strings(services) + assert.Equal(t, []string{"ec2", "elasticache", "memorydb", "opensearch", "rds", "redshift"}, services) +} diff --git a/internal/commitmentopts/service.go b/internal/commitmentopts/service.go new file mode 100644 index 00000000..322165f7 --- /dev/null +++ b/internal/commitmentopts/service.go @@ -0,0 +1,189 @@ +package commitmentopts + +import ( + "context" + "fmt" + "sync" + + "github.com/LeanerCloud/CUDly/internal/config" + "github.com/aws/aws-sdk-go-v2/aws" + "golang.org/x/sync/errgroup" +) + +// BuildConfigFn resolves an aws.Config for a given CloudAccount. The real +// wiring uses credentials.ResolveAWSCredentialProvider; tests substitute a +// stub that returns a zero aws.Config. +type BuildConfigFn func(ctx context.Context, account *config.CloudAccount) (aws.Config, error) + +// Service orchestrates probe-and-cache of AWS commitment options. +// +// Concurrency model: +// - Get is DB-first. The DB lookup is lock-free. +// - On a cache miss, Get acquires mu, re-checks the DB under the lock +// (to absorb a concurrent writer), then probes. The lock is per- +// process; the singleton PK on commitment_options_probe_runs handles +// cross-process races. +// - Probes run concurrently under errgroup. Any probe error aborts the +// whole attempt and NOTHING is persisted — we never want a partial +// snapshot to set the "cache warm" sentinel. +type Service struct { + store Store + accounts AccountLister + buildConfig BuildConfigFn + probers []Prober + mu sync.Mutex +} + +// New returns a Service wired to the given dependencies. Pass +// DefaultProbers() for the real probe set; tests inject stubs. +func New(store Store, accounts AccountLister, buildConfig BuildConfigFn, probers []Prober) *Service { + return &Service{ + store: store, + accounts: accounts, + buildConfig: buildConfig, + probers: probers, + } +} + +// Get returns cached Options if they exist, otherwise probes. On any +// failure (no AWS account connected, probe error) it returns ErrNoData +// without persisting; callers map that to the unavailable status. +func (s *Service) Get(ctx context.Context) (Options, error) { + // Fast path: probe run already persisted. + if opts, ok, err := s.store.Get(ctx); err != nil { + return nil, fmt.Errorf("read commitmentopts store: %w", err) + } else if ok { + return opts, nil + } + + s.mu.Lock() + defer s.mu.Unlock() + + // Re-check under the lock — a peer goroutine may have just persisted. + if opts, ok, err := s.store.Get(ctx); err != nil { + return nil, fmt.Errorf("read commitmentopts store: %w", err) + } else if ok { + return opts, nil + } + + return s.probeAndPersist(ctx) +} + +// probeAndPersist runs every configured Prober against the first enabled +// AWS account. Returns ErrNoData (WITHOUT persisting) if any step fails. +func (s *Service) probeAndPersist(ctx context.Context) (Options, error) { + account, err := s.findAWSAccount(ctx) + if err != nil { + return nil, err + } + if account == nil { + return nil, ErrNoData + } + + cfg, err := s.buildConfig(ctx, account) + if err != nil { + return nil, ErrNoData + } + + // Fan out: one probe per service, first error wins. + results := make([][]Combo, len(s.probers)) + group, gctx := errgroup.WithContext(ctx) + for i, p := range s.probers { + i, p := i, p + group.Go(func() error { + combos, err := p.Probe(gctx, cfg) + if err != nil { + return err + } + results[i] = combos + return nil + }) + } + if err := group.Wait(); err != nil { + // All-or-nothing: a single service failing must NOT produce a + // half-populated cache that subsequent Get calls would trust. + return nil, ErrNoData + } + + var all []Combo + for _, r := range results { + all = append(all, r...) + } + + if err := s.store.Save(ctx, all, account.ExternalID); err != nil { + return nil, fmt.Errorf("persist commitment options: %w", err) + } + + return buildOptions(all), nil +} + +// findAWSAccount returns the first enabled AWS account, or nil if none +// exist. Nil + nil error is the "no AWS connected" signal. +func (s *Service) findAWSAccount(ctx context.Context) (*config.CloudAccount, error) { + provider := "aws" + accounts, err := s.accounts.ListCloudAccounts(ctx, config.CloudAccountFilter{Provider: &provider}) + if err != nil { + return nil, fmt.Errorf("list cloud accounts: %w", err) + } + for i := range accounts { + if accounts[i].Provider == "aws" { + return &accounts[i], nil + } + } + return nil, nil +} + +// Validate reports whether (provider, service, term, payment) is a legal +// combination according to the cached probe data. +// +// Fallback behaviour: if no probe data exists (the server has never +// successfully probed) Validate returns true so saves aren't blocked when +// we can't verify. The frontend's hardcoded rules are the user-facing +// gate; this check is belt-and-braces. +func (s *Service) Validate(ctx context.Context, provider, service string, term int, payment string) (bool, error) { + opts, err := s.Get(ctx) + if err != nil { + if err == ErrNoData { + return true, nil + } + return false, err + } + + byService, ok := opts[provider] + if !ok { + // No combos for this provider at all — nothing to validate + // against. Permissive fallback keeps parity with ErrNoData. + return true, nil + } + combos, ok := byService[service] + if !ok { + // We have data for this provider but not this service. The + // service is not commitment-capable in our probe set (e.g. + // Savings Plans) — permissive fallback. + return true, nil + } + for _, c := range combos { + if c.TermYears == term && c.Payment == payment { + return true, nil + } + } + return false, nil +} + +// buildOptions groups a flat Combo slice into the nested Options map the +// API handler returns. +func buildOptions(combos []Combo) Options { + if len(combos) == 0 { + return Options{} + } + out := make(Options) + for _, c := range combos { + byService := out[c.Provider] + if byService == nil { + byService = make(map[string][]Combo) + out[c.Provider] = byService + } + byService[c.Service] = append(byService[c.Service], c) + } + return out +} diff --git a/internal/commitmentopts/service_test.go b/internal/commitmentopts/service_test.go new file mode 100644 index 00000000..ab25aee0 --- /dev/null +++ b/internal/commitmentopts/service_test.go @@ -0,0 +1,286 @@ +package commitmentopts + +import ( + "context" + "errors" + "sync" + "sync/atomic" + "testing" + + "github.com/LeanerCloud/CUDly/internal/config" + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// fakeStore is a memory-backed Store used throughout service_test.go. It +// tracks call counts so tests can assert the Save-once invariant. +type fakeStore struct { + mu sync.Mutex + opts Options + has bool + saves int32 + saveErr error + getErr error + hasErr error + savedID string + savedCnt int + saveHook func([]Combo, string) +} + +func (f *fakeStore) Get(ctx context.Context) (Options, bool, error) { + if f.getErr != nil { + return nil, false, f.getErr + } + f.mu.Lock() + defer f.mu.Unlock() + if !f.has { + return nil, false, nil + } + return f.opts, true, nil +} + +func (f *fakeStore) Save(ctx context.Context, combos []Combo, sourceAccountID string) error { + if f.saveErr != nil { + return f.saveErr + } + atomic.AddInt32(&f.saves, 1) + f.mu.Lock() + defer f.mu.Unlock() + f.opts = buildOptions(combos) + f.has = true + f.savedID = sourceAccountID + f.savedCnt = len(combos) + if f.saveHook != nil { + f.saveHook(combos, sourceAccountID) + } + return nil +} + +func (f *fakeStore) HasData(ctx context.Context) (bool, error) { + if f.hasErr != nil { + return false, f.hasErr + } + f.mu.Lock() + defer f.mu.Unlock() + return f.has, nil +} + +// fakeAccounts returns a fixed list. +type fakeAccounts struct { + accounts []config.CloudAccount + err error +} + +func (f *fakeAccounts) ListCloudAccounts(ctx context.Context, filter config.CloudAccountFilter) ([]config.CloudAccount, error) { + if f.err != nil { + return nil, f.err + } + return f.accounts, nil +} + +// stubProber returns a fixed set of combos (or an error). +type stubProber struct { + name string + combos []Combo + err error +} + +func (s *stubProber) Service() string { return s.name } +func (s *stubProber) Probe(ctx context.Context, _ aws.Config) ([]Combo, error) { + if s.err != nil { + return nil, s.err + } + return s.combos, nil +} + +func noopBuildConfig(ctx context.Context, _ *config.CloudAccount) (aws.Config, error) { + return aws.Config{}, nil +} + +func awsAccount(id string) config.CloudAccount { + return config.CloudAccount{ID: id, Provider: "aws", ExternalID: id, Enabled: true} +} + +// --------------------------------------------------------------------------- + +func TestService_Get_DBHitShortCircuits(t *testing.T) { + // Seed the store as if a prior probe already landed. The service must + // never invoke the probers. + cached := Options{"aws": {"rds": {{Provider: "aws", Service: "rds", TermYears: 1, Payment: "all-upfront"}}}} + store := &fakeStore{opts: cached, has: true} + + probed := false + probers := []Prober{&stubProber{name: "rds", combos: []Combo{{Provider: "aws", Service: "rds", TermYears: 3, Payment: "all-upfront"}}}} + // Wrap to detect any call. + wrapped := []Prober{proberFunc{name: "rds", fn: func() ([]Combo, error) { + probed = true + return probers[0].(*stubProber).combos, nil + }}} + + accounts := &fakeAccounts{accounts: []config.CloudAccount{awsAccount("acct")}} + svc := New(store, accounts, noopBuildConfig, wrapped) + + got, err := svc.Get(context.Background()) + require.NoError(t, err) + assert.Equal(t, cached, got) + assert.False(t, probed, "probers must not run when the DB is already warm") + assert.Zero(t, atomic.LoadInt32(&store.saves)) +} + +func TestService_Get_NoAWSAccountReturnsErrNoData(t *testing.T) { + store := &fakeStore{} + accounts := &fakeAccounts{accounts: nil} + svc := New(store, accounts, noopBuildConfig, []Prober{&stubProber{name: "rds"}}) + + _, err := svc.Get(context.Background()) + assert.ErrorIs(t, err, ErrNoData) + assert.Zero(t, atomic.LoadInt32(&store.saves)) +} + +func TestService_Get_AllProbesOKPersistsAndReturns(t *testing.T) { + store := &fakeStore{} + accounts := &fakeAccounts{accounts: []config.CloudAccount{awsAccount("123456789012")}} + probers := []Prober{ + &stubProber{name: "rds", combos: []Combo{{Provider: "aws", Service: "rds", TermYears: 1, Payment: "all-upfront"}}}, + &stubProber{name: "elasticache", combos: []Combo{{Provider: "aws", Service: "elasticache", TermYears: 3, Payment: "no-upfront"}}}, + } + svc := New(store, accounts, noopBuildConfig, probers) + + got, err := svc.Get(context.Background()) + require.NoError(t, err) + require.Contains(t, got, "aws") + assert.Len(t, got["aws"]["rds"], 1) + assert.Len(t, got["aws"]["elasticache"], 1) + assert.Equal(t, int32(1), atomic.LoadInt32(&store.saves)) + assert.Equal(t, "123456789012", store.savedID) + assert.Equal(t, 2, store.savedCnt) +} + +func TestService_Get_OneProberFailsDoesNotPersist(t *testing.T) { + store := &fakeStore{} + accounts := &fakeAccounts{accounts: []config.CloudAccount{awsAccount("a")}} + boom := errors.New("boom") + probers := []Prober{ + &stubProber{name: "rds", combos: []Combo{{Provider: "aws", Service: "rds", TermYears: 1, Payment: "all-upfront"}}}, + &stubProber{name: "elasticache", err: boom}, + } + svc := New(store, accounts, noopBuildConfig, probers) + + _, err := svc.Get(context.Background()) + assert.ErrorIs(t, err, ErrNoData) + assert.Zero(t, atomic.LoadInt32(&store.saves), "partial probe results must NOT be persisted") +} + +func TestService_Get_BuildConfigErrorReturnsErrNoData(t *testing.T) { + store := &fakeStore{} + accounts := &fakeAccounts{accounts: []config.CloudAccount{awsAccount("a")}} + boom := errors.New("creds") + svc := New(store, accounts, func(context.Context, *config.CloudAccount) (aws.Config, error) { + return aws.Config{}, boom + }, []Prober{&stubProber{name: "rds"}}) + + _, err := svc.Get(context.Background()) + assert.ErrorIs(t, err, ErrNoData) + assert.Zero(t, atomic.LoadInt32(&store.saves)) +} + +func TestService_Get_StoreReadErrorBubbles(t *testing.T) { + boom := errors.New("db down") + store := &fakeStore{getErr: boom} + accounts := &fakeAccounts{accounts: []config.CloudAccount{awsAccount("a")}} + svc := New(store, accounts, noopBuildConfig, []Prober{&stubProber{name: "rds"}}) + + _, err := svc.Get(context.Background()) + require.Error(t, err) + assert.ErrorIs(t, err, boom) +} + +func TestService_Validate_PermissiveOnErrNoData(t *testing.T) { + // No AWS account → Get returns ErrNoData → Validate must return true. + store := &fakeStore{} + accounts := &fakeAccounts{accounts: nil} + svc := New(store, accounts, noopBuildConfig, nil) + + ok, err := svc.Validate(context.Background(), "aws", "rds", 1, "all-upfront") + require.NoError(t, err) + assert.True(t, ok, "missing data must not block saves") +} + +func TestService_Validate_HitAndMiss(t *testing.T) { + cached := Options{"aws": {"rds": { + {Provider: "aws", Service: "rds", TermYears: 1, Payment: "all-upfront"}, + {Provider: "aws", Service: "rds", TermYears: 3, Payment: "no-upfront"}, + }}} + store := &fakeStore{opts: cached, has: true} + accounts := &fakeAccounts{} + svc := New(store, accounts, noopBuildConfig, nil) + + ok, err := svc.Validate(context.Background(), "aws", "rds", 1, "all-upfront") + require.NoError(t, err) + assert.True(t, ok) + + ok, err = svc.Validate(context.Background(), "aws", "rds", 3, "partial-upfront") + require.NoError(t, err) + assert.False(t, ok) +} + +func TestService_Validate_UnknownProviderPermissive(t *testing.T) { + cached := Options{"aws": {"rds": {{Provider: "aws", Service: "rds", TermYears: 1, Payment: "all-upfront"}}}} + store := &fakeStore{opts: cached, has: true} + svc := New(store, &fakeAccounts{}, noopBuildConfig, nil) + + // Azure isn't in our probe set — don't block. + ok, err := svc.Validate(context.Background(), "azure", "vm", 1, "all-upfront") + require.NoError(t, err) + assert.True(t, ok) +} + +func TestService_Validate_UnknownServiceUnderKnownProviderPermissive(t *testing.T) { + cached := Options{"aws": {"rds": {{Provider: "aws", Service: "rds", TermYears: 1, Payment: "all-upfront"}}}} + store := &fakeStore{opts: cached, has: true} + svc := New(store, &fakeAccounts{}, noopBuildConfig, nil) + + // savingsplans has no probe — don't block. + ok, err := svc.Validate(context.Background(), "aws", "savingsplans", 1, "all-upfront") + require.NoError(t, err) + assert.True(t, ok) +} + +func TestService_Get_ConcurrentCallersProbeOnce(t *testing.T) { + // Serialize probes via the mutex: N concurrent Get()s on a cold store + // must result in exactly one Save. + store := &fakeStore{} + accounts := &fakeAccounts{accounts: []config.CloudAccount{awsAccount("a")}} + + var probeCount int32 + probers := []Prober{proberFunc{name: "rds", fn: func() ([]Combo, error) { + atomic.AddInt32(&probeCount, 1) + return []Combo{{Provider: "aws", Service: "rds", TermYears: 1, Payment: "all-upfront"}}, nil + }}} + svc := New(store, accounts, noopBuildConfig, probers) + + var wg sync.WaitGroup + for i := 0; i < 8; i++ { + wg.Add(1) + go func() { + defer wg.Done() + _, _ = svc.Get(context.Background()) + }() + } + wg.Wait() + + assert.Equal(t, int32(1), atomic.LoadInt32(&store.saves), "Save must run exactly once") + assert.Equal(t, int32(1), atomic.LoadInt32(&probeCount), "Probe must run exactly once for 8 concurrent callers") +} + +// proberFunc is a tiny adapter letting tests wire a closure as a Prober. +type proberFunc struct { + name string + fn func() ([]Combo, error) +} + +func (p proberFunc) Service() string { return p.name } +func (p proberFunc) Probe(ctx context.Context, _ aws.Config) ([]Combo, error) { + return p.fn() +} diff --git a/internal/commitmentopts/store_postgres.go b/internal/commitmentopts/store_postgres.go new file mode 100644 index 00000000..9d166609 --- /dev/null +++ b/internal/commitmentopts/store_postgres.go @@ -0,0 +1,125 @@ +package commitmentopts + +import ( + "context" + "fmt" + + "github.com/LeanerCloud/CUDly/internal/database" + "github.com/jackc/pgx/v5" + "github.com/jackc/pgx/v5/pgconn" +) + +// dbConn is the minimal pgx-style surface PostgresStore uses. Both +// *database.Connection and the pgxmock mocks satisfy it. Mirrors the +// interface shape in internal/config/store_postgres.go so the two stores +// can share test-container scaffolding. +type dbConn interface { + QueryRow(ctx context.Context, sql string, args ...any) pgx.Row + Query(ctx context.Context, sql string, args ...any) (pgx.Rows, error) + Exec(ctx context.Context, sql string, args ...any) (pgconn.CommandTag, error) + Begin(ctx context.Context) (pgx.Tx, error) +} + +// PostgresStore is the Postgres-backed commitmentopts.Store implementation. +type PostgresStore struct { + db dbConn +} + +// NewPostgresStore returns a store backed by the given connection. The +// connection must already have the 000039 migration applied. +func NewPostgresStore(db *database.Connection) *PostgresStore { + return &PostgresStore{db: db} +} + +// Verify PostgresStore implements Store. +var _ Store = (*PostgresStore)(nil) + +// Get returns every persisted combo grouped by provider and service, +// plus a boolean that is true iff a probe run row exists. The combos map +// may be empty even when the boolean is true — that legitimately means +// "we probed and nothing matched our normalizer". +func (s *PostgresStore) Get(ctx context.Context) (Options, bool, error) { + hasData, err := s.HasData(ctx) + if err != nil { + return nil, false, err + } + if !hasData { + return nil, false, nil + } + + rows, err := s.db.Query(ctx, + `SELECT provider, service, term_years, payment_option FROM commitment_options_combos`, + ) + if err != nil { + return nil, false, fmt.Errorf("query commitment_options_combos: %w", err) + } + defer rows.Close() + + opts := make(Options) + for rows.Next() { + var c Combo + if err := rows.Scan(&c.Provider, &c.Service, &c.TermYears, &c.Payment); err != nil { + return nil, false, fmt.Errorf("scan commitment_options_combos row: %w", err) + } + byService := opts[c.Provider] + if byService == nil { + byService = make(map[string][]Combo) + opts[c.Provider] = byService + } + byService[c.Service] = append(byService[c.Service], c) + } + if err := rows.Err(); err != nil { + return nil, false, fmt.Errorf("iterate commitment_options_combos: %w", err) + } + return opts, true, nil +} + +// HasData reports whether a probe run row exists. +func (s *PostgresStore) HasData(ctx context.Context) (bool, error) { + var exists bool + if err := s.db.QueryRow(ctx, + `SELECT EXISTS(SELECT 1 FROM commitment_options_probe_runs)`, + ).Scan(&exists); err != nil { + return false, fmt.Errorf("check commitment_options_probe_runs: %w", err) + } + return exists, nil +} + +// Save persists the probe run row and every combo transactionally. If a +// run row already exists (a second process raced us to persist), Save is +// a no-op — the singleton PK means only the first writer wins. Combo +// inserts use ON CONFLICT DO NOTHING so re-running the probe after a +// manual row clear and partial repopulation doesn't fail. +func (s *PostgresStore) Save(ctx context.Context, combos []Combo, sourceAccountID string) error { + tx, err := s.db.Begin(ctx) + if err != nil { + return fmt.Errorf("begin tx: %w", err) + } + // Rollback is a no-op after a successful Commit. + defer func() { _ = tx.Rollback(ctx) }() + + if _, err := tx.Exec(ctx, + `INSERT INTO commitment_options_probe_runs (singleton, probed_at, source_account_id) + VALUES (TRUE, NOW(), $1) + ON CONFLICT (singleton) DO NOTHING`, + sourceAccountID, + ); err != nil { + return fmt.Errorf("insert commitment_options_probe_runs: %w", err) + } + + for _, c := range combos { + if _, err := tx.Exec(ctx, + `INSERT INTO commitment_options_combos (provider, service, term_years, payment_option) + VALUES ($1, $2, $3, $4) + ON CONFLICT DO NOTHING`, + c.Provider, c.Service, c.TermYears, c.Payment, + ); err != nil { + return fmt.Errorf("insert commitment_options_combos: %w", err) + } + } + + if err := tx.Commit(ctx); err != nil { + return fmt.Errorf("commit tx: %w", err) + } + return nil +} diff --git a/internal/commitmentopts/store_postgres_test.go b/internal/commitmentopts/store_postgres_test.go new file mode 100644 index 00000000..7a6d1a7c --- /dev/null +++ b/internal/commitmentopts/store_postgres_test.go @@ -0,0 +1,134 @@ +//go:build integration +// +build integration + +package commitmentopts_test + +import ( + "context" + "path/filepath" + "runtime" + "sort" + "testing" + + "github.com/LeanerCloud/CUDly/internal/commitmentopts" + "github.com/LeanerCloud/CUDly/internal/database/postgres/migrations" + "github.com/LeanerCloud/CUDly/internal/database/postgres/testhelpers" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// getMigrationsPath mirrors the helper in internal/config/store_postgres_test.go — +// the migrations directory is a sibling of internal//. +func getMigrationsPath() string { + _, filename, _, _ := runtime.Caller(0) + return filepath.Join(filepath.Dir(filename), "..", "database", "postgres", "migrations") +} + +// setupStore stands up a fresh Postgres container and returns a store +// bound to it. The container is torn down via t.Cleanup. +func setupStore(ctx context.Context, t *testing.T) *commitmentopts.PostgresStore { + t.Helper() + + container, err := testhelpers.SetupPostgresContainer(ctx, t) + require.NoError(t, err) + t.Cleanup(func() { container.Cleanup(ctx) }) + + require.NoError(t, migrations.RunMigrations(ctx, container.DB.Pool(), getMigrationsPath(), "", "")) + return commitmentopts.NewPostgresStore(container.DB) +} + +func TestPostgresStore_HasData_EmptyIsFalse(t *testing.T) { + ctx := context.Background() + store := setupStore(ctx, t) + + has, err := store.HasData(ctx) + require.NoError(t, err) + assert.False(t, has) +} + +func TestPostgresStore_Get_EmptyReturnsFalseBool(t *testing.T) { + ctx := context.Background() + store := setupStore(ctx, t) + + opts, has, err := store.Get(ctx) + require.NoError(t, err) + assert.False(t, has) + assert.Empty(t, opts) +} + +func TestPostgresStore_SaveAndGet(t *testing.T) { + ctx := context.Background() + store := setupStore(ctx, t) + + combos := []commitmentopts.Combo{ + {Provider: "aws", Service: "rds", TermYears: 1, Payment: "all-upfront"}, + {Provider: "aws", Service: "rds", TermYears: 1, Payment: "no-upfront"}, + {Provider: "aws", Service: "rds", TermYears: 3, Payment: "all-upfront"}, + {Provider: "aws", Service: "elasticache", TermYears: 1, Payment: "partial-upfront"}, + } + require.NoError(t, store.Save(ctx, combos, "123456789012")) + + has, err := store.HasData(ctx) + require.NoError(t, err) + assert.True(t, has) + + opts, has, err := store.Get(ctx) + require.NoError(t, err) + assert.True(t, has) + require.Contains(t, opts, "aws") + require.Contains(t, opts["aws"], "rds") + require.Contains(t, opts["aws"], "elasticache") + + rds := opts["aws"]["rds"] + sort.Slice(rds, func(i, j int) bool { + if rds[i].TermYears != rds[j].TermYears { + return rds[i].TermYears < rds[j].TermYears + } + return rds[i].Payment < rds[j].Payment + }) + assert.Equal(t, []commitmentopts.Combo{ + {Provider: "aws", Service: "rds", TermYears: 1, Payment: "all-upfront"}, + {Provider: "aws", Service: "rds", TermYears: 1, Payment: "no-upfront"}, + {Provider: "aws", Service: "rds", TermYears: 3, Payment: "all-upfront"}, + }, rds) + + assert.Equal(t, []commitmentopts.Combo{ + {Provider: "aws", Service: "elasticache", TermYears: 1, Payment: "partial-upfront"}, + }, opts["aws"]["elasticache"]) +} + +func TestPostgresStore_Save_IdempotentOnRerun(t *testing.T) { + ctx := context.Background() + store := setupStore(ctx, t) + + combos := []commitmentopts.Combo{ + {Provider: "aws", Service: "rds", TermYears: 1, Payment: "all-upfront"}, + } + require.NoError(t, store.Save(ctx, combos, "111111111111")) + // Second call must not fail (singleton PK conflict on the run row, + // compound PK conflict on the combos row). + require.NoError(t, store.Save(ctx, combos, "222222222222")) + + opts, has, err := store.Get(ctx) + require.NoError(t, err) + assert.True(t, has) + assert.Equal(t, combos, opts["aws"]["rds"]) +} + +func TestPostgresStore_Save_EmptyCombosStillMarksRun(t *testing.T) { + ctx := context.Background() + store := setupStore(ctx, t) + + // A legitimate "AWS returned no matching offerings" scenario — we + // persist the run row so the next Get short-circuits via the bool. + require.NoError(t, store.Save(ctx, nil, "333333333333")) + + has, err := store.HasData(ctx) + require.NoError(t, err) + assert.True(t, has) + + opts, has, err := store.Get(ctx) + require.NoError(t, err) + assert.True(t, has) + assert.Empty(t, opts) +} diff --git a/internal/commitmentopts/types.go b/internal/commitmentopts/types.go new file mode 100644 index 00000000..abe06e87 --- /dev/null +++ b/internal/commitmentopts/types.go @@ -0,0 +1,86 @@ +// Package commitmentopts discovers which (term, payment) commitment tuples +// each AWS commitment-capable service actually sells, persists the result, +// and exposes it to the API layer so the frontend can hide impossible +// combinations instead of hardcoding them. +// +// The package never blocks a save on missing data: if no probe has been +// persisted yet (or the probe failed), Service.Validate falls back to +// permissive-true and the frontend's hardcoded rules still gate the UI. +package commitmentopts + +import ( + "context" + "errors" + + "github.com/LeanerCloud/CUDly/internal/config" + "github.com/aws/aws-sdk-go-v2/aws" +) + +// Combo is one (provider, service, term, payment) tuple harvested from the +// reserved-offerings APIs. TermYears is 1 or 3; Payment is one of +// "all-upfront", "partial-upfront", "no-upfront". Other durations and legacy +// utilization-style payment options are dropped during normalization. +type Combo struct { + Provider string + Service string + TermYears int + Payment string +} + +// Options groups valid combos by provider and service. Shape: +// +// Options["aws"]["rds"] = []Combo{...} +// +// Only AWS is ever populated today — Azure and GCP stay hardcoded in the +// frontend because their commitment APIs don't have an equivalent probe. +type Options map[string]map[string][]Combo + +// Prober probes a single AWS service's reserved-offerings API and returns +// every unique (term, payment) tuple it exposes. Implementations live in +// probe.go, one per service. +type Prober interface { + // Service returns the canonical service name used as the Options key + // and persisted in commitment_options_combos.service. + Service() string + + // Probe issues paginated Describe*Offerings calls against a canonical + // small instance type, normalizes the results, and returns unique + // Combos. Errors bubble up — the orchestrating Service treats ANY + // probe failure as "don't persist" (all-or-nothing). + Probe(ctx context.Context, cfg aws.Config) ([]Combo, error) +} + +// Store persists probe results. Implementations must treat the combos table +// as idempotent (ON CONFLICT DO NOTHING) so concurrent writers don't trip +// each other. +type Store interface { + // Get returns the cached Options and a boolean indicating whether a + // probe run row exists. The boolean is the authoritative "cache warm" + // signal — an empty combos slice plus a present run row means "we + // probed, AWS returned nothing matching our normalizer", and callers + // should NOT re-probe. + Get(ctx context.Context) (Options, bool, error) + + // Save writes the probe run row and all combos atomically. If a run + // row already exists (concurrent writer won the race), Save is a + // no-op rather than an error. + Save(ctx context.Context, combos []Combo, sourceAccountID string) error + + // HasData reports whether a probe run row exists. Used as a cheap + // pre-check before acquiring the prober lock. + HasData(ctx context.Context) (bool, error) +} + +// AccountLister is the narrow subset of config.StoreInterface that Service +// needs. Defining it here (rather than depending on the full interface) +// keeps tests small and avoids a circular import when the API layer wires +// up the real store. +type AccountLister interface { + ListCloudAccounts(ctx context.Context, filter config.CloudAccountFilter) ([]config.CloudAccount, error) +} + +// ErrNoData signals "we don't have cached commitment options and can't +// produce them right now" (no AWS account connected, or a probe failed). +// Callers MUST NOT treat this as a hard error — the API handler maps it to +// `{"status":"unavailable"}` and Validate maps it to permissive-true. +var ErrNoData = errors.New("commitment options unavailable") diff --git a/internal/database/postgres/migrations/000039_commitment_options_cache.down.sql b/internal/database/postgres/migrations/000039_commitment_options_cache.down.sql new file mode 100644 index 00000000..fd7d6bb3 --- /dev/null +++ b/internal/database/postgres/migrations/000039_commitment_options_cache.down.sql @@ -0,0 +1,2 @@ +DROP TABLE IF EXISTS commitment_options_combos; +DROP TABLE IF EXISTS commitment_options_probe_runs; diff --git a/internal/database/postgres/migrations/000039_commitment_options_cache.up.sql b/internal/database/postgres/migrations/000039_commitment_options_cache.up.sql new file mode 100644 index 00000000..7d09fdfb --- /dev/null +++ b/internal/database/postgres/migrations/000039_commitment_options_cache.up.sql @@ -0,0 +1,23 @@ +-- commitment_options_probe_runs is a singleton table: one row per +-- successful probe of the AWS reserved-offerings APIs. Its presence is +-- the "is the cache warm?" sentinel. An empty table means the backend +-- has never persisted a probe (or the admin cleared it to force a +-- refresh). A non-empty table plus an empty commitment_options_combos +-- means "we probed and AWS genuinely has no matching offerings" — the +-- frontend will fall back to its hardcoded defaults in that case. +CREATE TABLE IF NOT EXISTS commitment_options_probe_runs ( + singleton BOOLEAN PRIMARY KEY DEFAULT TRUE CHECK (singleton), + probed_at TIMESTAMPTZ NOT NULL, + source_account_id TEXT NOT NULL +); + +-- commitment_options_combos records each supported (provider, service, +-- term, payment) tuple harvested from the reserved-offerings APIs. The +-- compound PK makes re-persists idempotent under ON CONFLICT DO NOTHING. +CREATE TABLE IF NOT EXISTS commitment_options_combos ( + provider TEXT NOT NULL, + service TEXT NOT NULL, + term_years INT NOT NULL, + payment_option TEXT NOT NULL, + PRIMARY KEY (provider, service, term_years, payment_option) +); diff --git a/internal/server/app.go b/internal/server/app.go index ec77d443..a8626861 100644 --- a/internal/server/app.go +++ b/internal/server/app.go @@ -15,6 +15,7 @@ import ( "github.com/LeanerCloud/CUDly/internal/analytics" "github.com/LeanerCloud/CUDly/internal/api" "github.com/LeanerCloud/CUDly/internal/auth" + "github.com/LeanerCloud/CUDly/internal/commitmentopts" "github.com/LeanerCloud/CUDly/internal/config" "github.com/LeanerCloud/CUDly/internal/credentials" "github.com/LeanerCloud/CUDly/internal/database" @@ -26,6 +27,7 @@ import ( "github.com/LeanerCloud/CUDly/internal/scheduler" "github.com/LeanerCloud/CUDly/internal/secrets" "github.com/LeanerCloud/CUDly/pkg/logging" + "github.com/aws/aws-sdk-go-v2/aws" awsconfig "github.com/aws/aws-sdk-go-v2/config" "github.com/aws/aws-sdk-go-v2/service/sts" "github.com/jackc/pgx/v5/pgxpool" @@ -583,6 +585,29 @@ func (app *Application) reinitializeAfterConnect(dbConn *database.Connection) er IsLambda: app.appConfig.IsLambda, }) + // Build the commitment-options probe/cache service. It lazily probes + // AWS reserved-offerings APIs through the first enabled AWS account and + // persists the result so subsequent calls come from the DB. Failures + // anywhere along the chain (no account connected, probe denied) collapse + // to ErrNoData; the API handler and save-side validator both degrade + // gracefully, so this is wired unconditionally. + commitmentOpts := commitmentopts.New( + commitmentopts.NewPostgresStore(dbConn), + app.Config, + func(ctx context.Context, acct *config.CloudAccount) (aws.Config, error) { + stsClient := sts.NewFromConfig(awsCfg) + prov, err := credentials.ResolveAWSCredentialProvider(ctx, acct, credStore, stsClient) + if err != nil { + return aws.Config{}, err + } + return awsconfig.LoadDefaultConfig(ctx, + awsconfig.WithCredentialsProvider(prov), + awsconfig.WithRegion("us-east-1"), + ) + }, + commitmentopts.DefaultProbers(), + ) + // Update API handler with new config store, scheduler, and rate limiter. // AnalyticsClient is Postgres-backed (see api.NewPostgresAnalyticsClient) — // it aggregates purchase_history on demand so the History UI charts work @@ -603,6 +628,7 @@ func (app *Application) reinitializeAfterConnect(dbConn *database.Connection) er AnalyticsClient: api.NewPostgresAnalyticsClient(dbConn), OIDCSigner: app.signer, OIDCIssuerURL: resolveOIDCIssuerURL(app.appConfig), + CommitmentOpts: commitmentOpts, }) if app.API == nil { return fmt.Errorf("failed to create API handler") From 1dcbef7bbd751c4954c49b0c127ca3703b1607e7 Mon Sep 17 00:00:00 2001 From: Cristian Magherusan-Stanciu Date: Fri, 24 Apr 2026 23:44:59 +0200 Subject: [PATCH 2/2] fix(commitment-options): observable probe failures, unavailable-on-any-error, stable frontend overlay MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to f315a0393. Two parallel review agents surfaced six concrete gaps against PR #54; this commit closes the material ones. - service.go: probe errors are no longer silently discarded into ErrNoData. Both the buildConfig failure and the errgroup.Wait() error are logged with account ID + the originating probe name so an operator can tell "no AWS account connected" from "IAM permission missing on opensearch:DescribeReservedInstanceOfferings". - handler_commitment_options.go: the endpoint now returns {status:"unavailable"} (HTTP 200) for ANY error from the service — not just ErrNoData. A transient DB blip or context cancellation no longer 500s the Settings overlay fetch. Non-ErrNoData errors are logged so telemetry is preserved. The existing "UnexpectedError_Propagates" test is rewritten to assert the new collapse-to-unavailable contract. - commitmentOptions.ts: the overlay now diffs against the canonical STANDARD_TERMS × AWS_PAYMENTS product instead of the (possibly already-narrowed) existing entry. Previously, if the server widened its supported set between two Settings visits, the second overlay computed the intersection against the stale narrow base and the UI stayed restrictive. New test re-widens after a narrow overlay to regress-guard. - openapi.yaml: /api/commitment-options is documented with its CommitmentOptionsResponse + CommitmentOptionCombo schemas, matching the convention used for every other route. - app.go: a short comment explains why us-east-1 is hardcoded (reserved offerings are global facts; widest coverage; GovCloud / China fall back to hardcoded rules via ErrNoData). - handler_commitment_options_test.go: new TestNewHandler_CommitmentOptsWired asserts that HandlerConfig.CommitmentOpts actually lands on the Handler — catches a future silent field-rename regression. Three out-of-scope items (router-level AuthUser enforcement, MemoryDB probe target verification, page-cap tests for the other 5 probers) tracked as separate follow-up GitHub issues. --- .../src/__tests__/commitmentOptions.test.ts | 20 +++++++ frontend/src/commitmentOptions.ts | 18 +++--- internal/api/handler_commitment_options.go | 12 +++- .../api/handler_commitment_options_test.go | 19 +++++- internal/api/openapi.yaml | 58 +++++++++++++++++++ internal/commitmentopts/service.go | 7 ++- internal/server/app.go | 7 +++ 7 files changed, 125 insertions(+), 16 deletions(-) diff --git a/frontend/src/__tests__/commitmentOptions.test.ts b/frontend/src/__tests__/commitmentOptions.test.ts index e5be4b35..1075e5cb 100644 --- a/frontend/src/__tests__/commitmentOptions.test.ts +++ b/frontend/src/__tests__/commitmentOptions.test.ts @@ -792,5 +792,25 @@ describe('commitmentOptions', () => { expect(isValidCombination('aws', 'rds', 3, 'no-upfront')).toBe(true); expect(isValidCombination('aws', 'rds', 1, 'partial-upfront')).toBe(true); }); + + it('re-widens after a subsequent overlay reports broader support', async () => { + // Settings tab is re-entered after the server probe completes with + // a fuller combo set. The overlay must diff against the canonical + // STANDARD_TERMS × AWS_PAYMENTS product, not against the narrow + // result of the first call, or the UI stays stuck on the + // intersection and never widens. + const narrow = mockOk({ rds: [{ term: 1, payment: 'all-upfront' }] }); + await fetchAndPopulateCommitmentOptions(narrow); + expect(isValidCombination('aws', 'rds', 3, 'no-upfront')).toBe(false); + + const allCombos = [1, 3].flatMap(term => + ['all-upfront', 'partial-upfront', 'no-upfront'].map(payment => ({ term, payment })) + ); + const wide = mockOk({ rds: allCombos }); + await fetchAndPopulateCommitmentOptions(wide); + + expect(isValidCombination('aws', 'rds', 3, 'no-upfront')).toBe(true); + expect(isValidCombination('aws', 'rds', 1, 'partial-upfront')).toBe(true); + }); }); }); diff --git a/frontend/src/commitmentOptions.ts b/frontend/src/commitmentOptions.ts index 26e20c98..e44e033c 100644 --- a/frontend/src/commitmentOptions.ts +++ b/frontend/src/commitmentOptions.ts @@ -292,17 +292,17 @@ export async function fetchAndPopulateCommitmentOptions(fetchFn?: FetchLike): Pr const awsConfigs = commitmentConfigs.aws ?? (commitmentConfigs.aws = {}); for (const [service, supportedCombos] of Object.entries(body.aws)) { - const existing = awsConfigs[service]; - const base: CommitmentConfig = existing ?? { terms: STANDARD_TERMS, payments: AWS_PAYMENTS }; - - // Derive invalidCombinations as the set difference of (terms × payments) - // minus the supported tuples the server returned. + // Always diff against the canonical STANDARD_TERMS × AWS_PAYMENTS + // product, not against the existing (possibly-narrowed-from-a-prior- + // overlay) entry. Otherwise, when the server widens its supported set + // on a later fetch, we'd be computing the intersection against a + // stale narrow base and the UI would stay restrictive. const supportedKeys = new Set( supportedCombos.map(c => `${c.term}:${c.payment}`) ); const invalid: Array<{ term: number; payment: string }> = []; - for (const term of base.terms) { - for (const payment of base.payments) { + for (const term of STANDARD_TERMS) { + for (const payment of AWS_PAYMENTS) { if (!supportedKeys.has(`${term.value}:${payment.value}`)) { invalid.push({ term: term.value, payment: payment.value }); } @@ -310,8 +310,8 @@ export async function fetchAndPopulateCommitmentOptions(fetchFn?: FetchLike): Pr } awsConfigs[service] = { - terms: base.terms, - payments: base.payments, + terms: STANDARD_TERMS, + payments: AWS_PAYMENTS, invalidCombinations: invalid.length > 0 ? invalid : undefined, }; } diff --git a/internal/api/handler_commitment_options.go b/internal/api/handler_commitment_options.go index 59adf683..62833bf4 100644 --- a/internal/api/handler_commitment_options.go +++ b/internal/api/handler_commitment_options.go @@ -6,6 +6,7 @@ import ( "errors" "github.com/LeanerCloud/CUDly/internal/commitmentopts" + "github.com/LeanerCloud/CUDly/pkg/logging" ) // commitmentOptionsResponse is the JSON shape returned by @@ -37,10 +38,15 @@ func (h *Handler) getCommitmentOptions(ctx context.Context) (*commitmentOptionsR } opts, err := h.commitmentOpts.Get(ctx) if err != nil { - if errors.Is(err, commitmentopts.ErrNoData) { - return &commitmentOptionsResponse{Status: "unavailable"}, nil + // Any error collapses to "unavailable" so a transient DB blip + // or context cancellation doesn't break the Settings page + // overlay fetch. ErrNoData is the quiet path (no account + // connected / fresh install); anything else is logged so + // operators can still trace DB/connection issues. + if !errors.Is(err, commitmentopts.ErrNoData) { + logging.Warnf("commitmentopts handler: %v", err) } - return nil, err + return &commitmentOptionsResponse{Status: "unavailable"}, nil } awsOpts := opts["aws"] diff --git a/internal/api/handler_commitment_options_test.go b/internal/api/handler_commitment_options_test.go index 6e0051b8..d4d3a92f 100644 --- a/internal/api/handler_commitment_options_test.go +++ b/internal/api/handler_commitment_options_test.go @@ -49,7 +49,10 @@ func TestGetCommitmentOptions_ErrNoData_ReturnsUnavailable(t *testing.T) { assert.Nil(t, resp.AWS) } -func TestGetCommitmentOptions_UnexpectedError_Propagates(t *testing.T) { +func TestGetCommitmentOptions_UnexpectedError_CollapsesToUnavailable(t *testing.T) { + // Any non-ErrNoData failure (DB blip, ctx cancellation, etc.) must + // still return 200 + unavailable so the Settings page overlay doesn't + // break on transient server issues. The error is logged, not returned. boom := errors.New("database exploded") h := &Handler{commitmentOpts: &stubCommitmentOpts{ getFn: func(context.Context) (commitmentopts.Options, error) { @@ -59,8 +62,18 @@ func TestGetCommitmentOptions_UnexpectedError_Propagates(t *testing.T) { resp, err := h.getCommitmentOptions(context.Background()) - require.ErrorIs(t, err, boom) - assert.Nil(t, resp) + require.NoError(t, err) + assert.Equal(t, "unavailable", resp.Status) + assert.Nil(t, resp.AWS) +} + +func TestNewHandler_CommitmentOptsWired(t *testing.T) { + // Regression guard: silently dropping the HandlerConfig → Handler wire + // (via a field rename or accidental removal) would re-introduce the + // "endpoint always returns unavailable" bug we already fixed once. + stub := &stubCommitmentOpts{} + h := NewHandler(HandlerConfig{CommitmentOpts: stub}) + require.Same(t, stub, h.commitmentOpts) } func TestGetCommitmentOptions_EmptyAWS_CollapsesToUnavailable(t *testing.T) { diff --git a/internal/api/openapi.yaml b/internal/api/openapi.yaml index e6a9a8d6..fdfcd331 100644 --- a/internal/api/openapi.yaml +++ b/internal/api/openapi.yaml @@ -167,6 +167,29 @@ paths: '401': $ref: '#/components/responses/Unauthorized' + /api/commitment-options: + get: + operationId: getCommitmentOptions + tags: [Configuration] + summary: Dynamically-probed AWS commitment combinations + description: | + Returns the (term, payment) combinations AWS actually sells for + commitment-capable services, harvested from the reserved-offerings + APIs at most once per server lifetime and persisted to Postgres. + When probe data is unavailable (no AWS account connected, probe + failed, transient DB error) returns `{"status":"unavailable"}` at + HTTP 200 so the frontend can fall back to hardcoded rules without + tripping its error-toast path. + responses: + '200': + description: Commitment options or unavailable status + content: + application/json: + schema: + $ref: '#/components/schemas/CommitmentOptionsResponse' + '401': + $ref: '#/components/responses/Unauthorized' + # ---- Recommendations ---------------------------------------------------- /api/recommendations: get: @@ -1776,6 +1799,41 @@ components: items: type: string + CommitmentOptionsResponse: + type: object + required: [status] + properties: + status: + type: string + enum: [ok, unavailable] + description: | + `ok` means `aws` carries the probed combos. `unavailable` means + the probe hasn't run (no AWS account connected) or the most + recent attempt failed; the frontend should use hardcoded + defaults. + aws: + type: object + description: | + Per-service supported (term, payment) tuples. Keys are service + identifiers (rds, elasticache, opensearch, redshift, memorydb, + ec2). Omitted when status is `unavailable`. + additionalProperties: + type: array + items: + $ref: '#/components/schemas/CommitmentOptionCombo' + + CommitmentOptionCombo: + type: object + required: [term, payment] + properties: + term: + type: integer + enum: [1, 3] + description: Commitment length in years. + payment: + type: string + enum: [all-upfront, partial-upfront, no-upfront] + # -- Dashboard ---------------------------------------------------------- DashboardSummary: type: object diff --git a/internal/commitmentopts/service.go b/internal/commitmentopts/service.go index 322165f7..d85ce966 100644 --- a/internal/commitmentopts/service.go +++ b/internal/commitmentopts/service.go @@ -6,6 +6,7 @@ import ( "sync" "github.com/LeanerCloud/CUDly/internal/config" + "github.com/LeanerCloud/CUDly/pkg/logging" "github.com/aws/aws-sdk-go-v2/aws" "golang.org/x/sync/errgroup" ) @@ -82,6 +83,7 @@ func (s *Service) probeAndPersist(ctx context.Context) (Options, error) { cfg, err := s.buildConfig(ctx, account) if err != nil { + logging.Warnf("commitmentopts: probe aborted — build config account=%s: %v", account.ID, err) return nil, ErrNoData } @@ -93,7 +95,7 @@ func (s *Service) probeAndPersist(ctx context.Context) (Options, error) { group.Go(func() error { combos, err := p.Probe(gctx, cfg) if err != nil { - return err + return fmt.Errorf("probe %s: %w", p.Service(), err) } results[i] = combos return nil @@ -102,6 +104,9 @@ func (s *Service) probeAndPersist(ctx context.Context) (Options, error) { if err := group.Wait(); err != nil { // All-or-nothing: a single service failing must NOT produce a // half-populated cache that subsequent Get calls would trust. + // Log the first error so an operator can diagnose missing IAM + // permissions or SDK regressions without tailing per-service logs. + logging.Warnf("commitmentopts: probe aborted — account=%s: %v", account.ID, err) return nil, ErrNoData } diff --git a/internal/server/app.go b/internal/server/app.go index a8626861..570fb514 100644 --- a/internal/server/app.go +++ b/internal/server/app.go @@ -600,6 +600,13 @@ func (app *Application) reinitializeAfterConnect(dbConn *database.Connection) er if err != nil { return aws.Config{}, err } + // us-east-1 hardcoded because reserved offerings are global + // facts (not AZ-scoped), and us-east-1 has the widest + // instance-type coverage. This fails silently for GovCloud + // / China-partition accounts — those return ErrNoData from + // the probe and the frontend falls back to hardcoded rules, + // which is acceptable since those partitions rarely need + // dynamic commitment detection. return awsconfig.LoadDefaultConfig(ctx, awsconfig.WithCredentialsProvider(prov), awsconfig.WithRegion("us-east-1"),