diff --git a/frontend/src/__tests__/commitmentOptions.test.ts b/frontend/src/__tests__/commitmentOptions.test.ts index 0c97ad1b..1075e5cb 100644 --- a/frontend/src/__tests__/commitmentOptions.test.ts +++ b/frontend/src/__tests__/commitmentOptions.test.ts @@ -2,6 +2,7 @@ * Tests for commitmentOptions module */ import { + fetchAndPopulateCommitmentOptions, getCommitmentConfig, isValidCombination, getValidPaymentOptions, @@ -707,4 +708,109 @@ describe('commitmentOptions', () => { expect(configWithInvalid.invalidCombinations).toHaveLength(1); }); }); + + describe('fetchAndPopulateCommitmentOptions', () => { + // These tests mutate the shared commitmentConfigs module state via the + // function under test. Restore a known-good overlay at the end of each + // test by re-populating with the hardcoded fallback shape (no supported + // combos → full invalidCombinations → close enough to the starting + // state for subsequent tests to see consistent results). + const mockOk = (aws: Record>) => + jest.fn().mockResolvedValue({ + ok: true, + json: () => Promise.resolve({ status: 'ok', aws }) + } as Response); + + afterEach(() => { + // Reset RDS back to hardcoded — other tests in this file assert RDS + // retains `{term:3, payment:'no-upfront'}` as invalid. + return fetchAndPopulateCommitmentOptions( + mockOk({ rds: [ + { term: 1, payment: 'all-upfront' }, + { term: 1, payment: 'partial-upfront' }, + { term: 1, payment: 'no-upfront' }, + { term: 3, payment: 'all-upfront' }, + { term: 3, payment: 'partial-upfront' }, + ]}) + ); + }); + + it('overlays server-supplied combos onto AWS services', async () => { + // Server says RDS supports ONLY 1yr all-upfront — everything else is + // invalid. Overlay should reflect that exactly. + const fetchMock = mockOk({ + rds: [{ term: 1, payment: 'all-upfront' }], + }); + + await fetchAndPopulateCommitmentOptions(fetchMock); + + expect(fetchMock).toHaveBeenCalledWith('/api/commitment-options'); + expect(isValidCombination('aws', 'rds', 1, 'all-upfront')).toBe(true); + expect(isValidCombination('aws', 'rds', 1, 'partial-upfront')).toBe(false); + expect(isValidCombination('aws', 'rds', 3, 'no-upfront')).toBe(false); + }); + + it('leaves hardcoded rules intact on status:unavailable', async () => { + const fetchMock = jest.fn().mockResolvedValue({ + ok: true, + json: () => Promise.resolve({ status: 'unavailable' }) + } as Response); + + await fetchAndPopulateCommitmentOptions(fetchMock); + + // RDS hardcoded rule: 3yr no-upfront invalid, others valid. + expect(isValidCombination('aws', 'rds', 3, 'no-upfront')).toBe(false); + expect(isValidCombination('aws', 'rds', 1, 'all-upfront')).toBe(true); + }); + + it('leaves hardcoded rules intact on HTTP error', async () => { + const fetchMock = jest.fn().mockResolvedValue({ ok: false } as Response); + + await fetchAndPopulateCommitmentOptions(fetchMock); + + expect(isValidCombination('aws', 'rds', 3, 'no-upfront')).toBe(false); + expect(isValidCombination('aws', 'rds', 1, 'all-upfront')).toBe(true); + }); + + it('leaves hardcoded rules intact on network error', async () => { + const fetchMock = jest.fn().mockRejectedValue(new Error('network down')); + + await fetchAndPopulateCommitmentOptions(fetchMock); + + expect(isValidCombination('aws', 'rds', 3, 'no-upfront')).toBe(false); + }); + + it('clears invalidCombinations when server reports full support', async () => { + const allCombos = [1, 3].flatMap(term => + ['all-upfront', 'partial-upfront', 'no-upfront'].map(payment => ({ term, payment })) + ); + const fetchMock = mockOk({ rds: allCombos }); + + await fetchAndPopulateCommitmentOptions(fetchMock); + + // Every combo now valid — previously 3yr no-upfront was blocked. + expect(isValidCombination('aws', 'rds', 3, 'no-upfront')).toBe(true); + expect(isValidCombination('aws', 'rds', 1, 'partial-upfront')).toBe(true); + }); + + it('re-widens after a subsequent overlay reports broader support', async () => { + // Settings tab is re-entered after the server probe completes with + // a fuller combo set. The overlay must diff against the canonical + // STANDARD_TERMS × AWS_PAYMENTS product, not against the narrow + // result of the first call, or the UI stays stuck on the + // intersection and never widens. + const narrow = mockOk({ rds: [{ term: 1, payment: 'all-upfront' }] }); + await fetchAndPopulateCommitmentOptions(narrow); + expect(isValidCombination('aws', 'rds', 3, 'no-upfront')).toBe(false); + + const allCombos = [1, 3].flatMap(term => + ['all-upfront', 'partial-upfront', 'no-upfront'].map(payment => ({ term, payment })) + ); + const wide = mockOk({ rds: allCombos }); + await fetchAndPopulateCommitmentOptions(wide); + + expect(isValidCombination('aws', 'rds', 3, 'no-upfront')).toBe(true); + expect(isValidCombination('aws', 'rds', 1, 'partial-upfront')).toBe(true); + }); + }); }); diff --git a/frontend/src/commitmentOptions.ts b/frontend/src/commitmentOptions.ts index d78c500d..e44e033c 100644 --- a/frontend/src/commitmentOptions.ts +++ b/frontend/src/commitmentOptions.ts @@ -254,6 +254,69 @@ export function getPaymentLabel(value: string): string { return payment?.label ?? value; } +interface ServerCommitmentOptions { + status: 'ok' | 'unavailable'; + aws?: Record>; +} + +// FetchLike is a subset of fetch's signature that tests can stub without +// touching the global. Defaults to `fetch` when the caller passes nothing. +type FetchLike = (input: string, init?: RequestInit) => Promise; + +/** + * Fetch the dynamically-probed AWS commitment options from the backend and + * overlay them onto `commitmentConfigs.aws`. Server-side data always wins — + * we trust live AWS offerings over our hardcoded list. On any failure + * (network error, non-200, {status:"unavailable"}) the hardcoded rules stay + * intact so the UI still gates correctly. + * + * Idempotent: calling twice with the same data is a no-op. The Settings + * page awaits this before syncing per-service constraints so the first + * render reflects the server's rules, not the fallback's. + */ +export async function fetchAndPopulateCommitmentOptions(fetchFn?: FetchLike): Promise { + const f: FetchLike = fetchFn ?? ((input, init) => fetch(input, init)); + + let body: ServerCommitmentOptions; + try { + const resp = await f('/api/commitment-options'); + if (!resp.ok) return; + body = (await resp.json()) as ServerCommitmentOptions; + } catch { + // Network/parse errors: silently fall back to hardcoded rules. The + // frontend still works; we just miss the dynamic overlay. + return; + } + + if (body.status !== 'ok' || !body.aws) return; + + const awsConfigs = commitmentConfigs.aws ?? (commitmentConfigs.aws = {}); + for (const [service, supportedCombos] of Object.entries(body.aws)) { + // Always diff against the canonical STANDARD_TERMS × AWS_PAYMENTS + // product, not against the existing (possibly-narrowed-from-a-prior- + // overlay) entry. Otherwise, when the server widens its supported set + // on a later fetch, we'd be computing the intersection against a + // stale narrow base and the UI would stay restrictive. + const supportedKeys = new Set( + supportedCombos.map(c => `${c.term}:${c.payment}`) + ); + const invalid: Array<{ term: number; payment: string }> = []; + for (const term of STANDARD_TERMS) { + for (const payment of AWS_PAYMENTS) { + if (!supportedKeys.has(`${term.value}:${payment.value}`)) { + invalid.push({ term: term.value, payment: payment.value }); + } + } + } + + awsConfigs[service] = { + terms: STANDARD_TERMS, + payments: AWS_PAYMENTS, + invalidCombinations: invalid.length > 0 ? invalid : undefined, + }; + } +} + /** * Map legacy AWS payment values to display labels */ diff --git a/frontend/src/settings.ts b/frontend/src/settings.ts index c6273703..efcf7fd4 100644 --- a/frontend/src/settings.ts +++ b/frontend/src/settings.ts @@ -3,6 +3,7 @@ */ import * as api from './api'; +import { fetchAndPopulateCommitmentOptions } from './commitmentOptions'; import { initFederationPanel } from './federation'; import { confirmDialog } from './confirmDialog'; import { reflectDirtyState } from './settings-subnav'; @@ -1204,6 +1205,11 @@ export async function loadGlobalSettings(): Promise { if (formEl) formEl.classList.add('hidden'); if (errorEl) errorEl.classList.add('hidden'); + // Overlay dynamically-probed AWS commitment rules before we render the + // form, so the first paint already respects server data. Failures fall + // back to hardcoded rules silently — we never block Settings on this. + await fetchAndPopulateCommitmentOptions(); + try { const data = await api.getConfig(); diff --git a/go.mod b/go.mod index e246261d..8fd3bfa3 100644 --- a/go.mod +++ b/go.mod @@ -9,11 +9,11 @@ require ( github.com/aws/aws-sdk-go-v2/config v1.26.2 github.com/aws/aws-sdk-go-v2/service/costexplorer v1.61.0 // indirect github.com/aws/aws-sdk-go-v2/service/ec2 v1.251.2 - github.com/aws/aws-sdk-go-v2/service/elasticache v1.50.3 // indirect - github.com/aws/aws-sdk-go-v2/service/memorydb v1.31.4 // indirect - github.com/aws/aws-sdk-go-v2/service/opensearch v1.52.3 // indirect + github.com/aws/aws-sdk-go-v2/service/elasticache v1.50.3 + github.com/aws/aws-sdk-go-v2/service/memorydb v1.31.4 + github.com/aws/aws-sdk-go-v2/service/opensearch v1.52.3 github.com/aws/aws-sdk-go-v2/service/rds v1.97.3 - github.com/aws/aws-sdk-go-v2/service/redshift v1.58.3 // indirect + github.com/aws/aws-sdk-go-v2/service/redshift v1.58.3 github.com/spf13/cobra v1.8.0 github.com/stretchr/testify v1.11.1 ) diff --git a/internal/api/handler.go b/internal/api/handler.go index ee615d67..61f0c7d6 100644 --- a/internal/api/handler.go +++ b/internal/api/handler.go @@ -69,6 +69,12 @@ type Handler struct { // fields stay nil. reshapeEC2Factory func(aws.Config) reshapeEC2Client reshapeRecsFactory func(aws.Config) reshapeRecsClient + + // commitmentOpts discovers which AWS (term, payment) combinations + // each service actually sells and validates saves against that data. + // Nil is valid: the endpoint returns unavailable and save-side + // validation no-ops, deferring to the frontend's hardcoded rules. + commitmentOpts CommitmentOptsInterface } // getRIUtilizationCache returns the Postgres-backed TTL cache for Cost @@ -111,6 +117,7 @@ func NewHandler(cfg HandlerConfig) *Handler { analyticsCollector: cfg.AnalyticsCollector, signer: cfg.OIDCSigner, issuerURL: cfg.OIDCIssuerURL, + commitmentOpts: cfg.CommitmentOpts, } // Pre-load API key (with a 5s timeout to avoid stalling cold-start indefinitely) diff --git a/internal/api/handler_commitment_options.go b/internal/api/handler_commitment_options.go new file mode 100644 index 00000000..62833bf4 --- /dev/null +++ b/internal/api/handler_commitment_options.go @@ -0,0 +1,68 @@ +// Package api provides the HTTP API handlers for the CUDly dashboard. +package api + +import ( + "context" + "errors" + + "github.com/LeanerCloud/CUDly/internal/commitmentopts" + "github.com/LeanerCloud/CUDly/pkg/logging" +) + +// commitmentOptionsResponse is the JSON shape returned by +// GET /api/commitment-options. On success, AWS carries the probed combos +// keyed by service (rds, elasticache, ...). Azure and GCP are deliberately +// omitted — those commitment rules stay hardcoded in the frontend because +// their APIs don't expose a comparable probe. +type commitmentOptionsResponse struct { + Status string `json:"status"` + AWS map[string][]commitmentOptionCombo `json:"aws,omitempty"` +} + +// commitmentOptionCombo is one (term, payment) tuple as the frontend +// consumes it. Dropping Provider/Service from the persisted Combo shape +// keeps the wire payload compact. +type commitmentOptionCombo struct { + Term int `json:"term"` + Payment string `json:"payment"` +} + +// getCommitmentOptions returns the dynamically-probed AWS commitment +// options. On any error or when the probe hasn't run yet, it returns +// {"status":"unavailable"} (200, not a 5xx) so the frontend can fall +// back to its hardcoded defaults without tripping the generic +// error-toast path. +func (h *Handler) getCommitmentOptions(ctx context.Context) (*commitmentOptionsResponse, error) { + if h.commitmentOpts == nil { + return &commitmentOptionsResponse{Status: "unavailable"}, nil + } + opts, err := h.commitmentOpts.Get(ctx) + if err != nil { + // Any error collapses to "unavailable" so a transient DB blip + // or context cancellation doesn't break the Settings page + // overlay fetch. ErrNoData is the quiet path (no account + // connected / fresh install); anything else is logged so + // operators can still trace DB/connection issues. + if !errors.Is(err, commitmentopts.ErrNoData) { + logging.Warnf("commitmentopts handler: %v", err) + } + return &commitmentOptionsResponse{Status: "unavailable"}, nil + } + + awsOpts := opts["aws"] + out := make(map[string][]commitmentOptionCombo, len(awsOpts)) + for svc, combos := range awsOpts { + out[svc] = make([]commitmentOptionCombo, 0, len(combos)) + for _, c := range combos { + out[svc] = append(out[svc], commitmentOptionCombo{Term: c.TermYears, Payment: c.Payment}) + } + } + if len(out) == 0 { + // Probe ran but returned nothing for AWS (e.g. every probe got + // filtered out by normalization). Treat as unavailable so the + // frontend falls through to its hardcoded rules rather than + // rendering an empty constraint set. + return &commitmentOptionsResponse{Status: "unavailable"}, nil + } + return &commitmentOptionsResponse{Status: "ok", AWS: out}, nil +} diff --git a/internal/api/handler_commitment_options_test.go b/internal/api/handler_commitment_options_test.go new file mode 100644 index 00000000..d4d3a92f --- /dev/null +++ b/internal/api/handler_commitment_options_test.go @@ -0,0 +1,123 @@ +package api + +import ( + "context" + "errors" + "testing" + + "github.com/LeanerCloud/CUDly/internal/commitmentopts" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// stubCommitmentOpts lets handler tests drive the endpoint without the full +// probe+store machinery. Mirrors the CommitmentOptsInterface surface. +type stubCommitmentOpts struct { + getFn func(ctx context.Context) (commitmentopts.Options, error) + validateFn func(ctx context.Context, provider, service string, term int, payment string) (bool, error) +} + +func (s *stubCommitmentOpts) Get(ctx context.Context) (commitmentopts.Options, error) { + return s.getFn(ctx) +} + +func (s *stubCommitmentOpts) Validate(ctx context.Context, provider, service string, term int, payment string) (bool, error) { + return s.validateFn(ctx, provider, service, term, payment) +} + +func TestGetCommitmentOptions_NilService_ReturnsUnavailable(t *testing.T) { + h := &Handler{commitmentOpts: nil} + + resp, err := h.getCommitmentOptions(context.Background()) + + require.NoError(t, err) + assert.Equal(t, "unavailable", resp.Status) + assert.Nil(t, resp.AWS) +} + +func TestGetCommitmentOptions_ErrNoData_ReturnsUnavailable(t *testing.T) { + h := &Handler{commitmentOpts: &stubCommitmentOpts{ + getFn: func(context.Context) (commitmentopts.Options, error) { + return nil, commitmentopts.ErrNoData + }, + }} + + resp, err := h.getCommitmentOptions(context.Background()) + + require.NoError(t, err) + assert.Equal(t, "unavailable", resp.Status) + assert.Nil(t, resp.AWS) +} + +func TestGetCommitmentOptions_UnexpectedError_CollapsesToUnavailable(t *testing.T) { + // Any non-ErrNoData failure (DB blip, ctx cancellation, etc.) must + // still return 200 + unavailable so the Settings page overlay doesn't + // break on transient server issues. The error is logged, not returned. + boom := errors.New("database exploded") + h := &Handler{commitmentOpts: &stubCommitmentOpts{ + getFn: func(context.Context) (commitmentopts.Options, error) { + return nil, boom + }, + }} + + resp, err := h.getCommitmentOptions(context.Background()) + + require.NoError(t, err) + assert.Equal(t, "unavailable", resp.Status) + assert.Nil(t, resp.AWS) +} + +func TestNewHandler_CommitmentOptsWired(t *testing.T) { + // Regression guard: silently dropping the HandlerConfig → Handler wire + // (via a field rename or accidental removal) would re-introduce the + // "endpoint always returns unavailable" bug we already fixed once. + stub := &stubCommitmentOpts{} + h := NewHandler(HandlerConfig{CommitmentOpts: stub}) + require.Same(t, stub, h.commitmentOpts) +} + +func TestGetCommitmentOptions_EmptyAWS_CollapsesToUnavailable(t *testing.T) { + // Probe succeeded but returned nothing for AWS. Treating this as + // unavailable keeps the frontend on its hardcoded fallback rather than + // rendering an empty constraint set. + h := &Handler{commitmentOpts: &stubCommitmentOpts{ + getFn: func(context.Context) (commitmentopts.Options, error) { + return commitmentopts.Options{"aws": map[string][]commitmentopts.Combo{}}, nil + }, + }} + + resp, err := h.getCommitmentOptions(context.Background()) + + require.NoError(t, err) + assert.Equal(t, "unavailable", resp.Status) + assert.Nil(t, resp.AWS) +} + +func TestGetCommitmentOptions_Success_ReturnsAWSCombos(t *testing.T) { + h := &Handler{commitmentOpts: &stubCommitmentOpts{ + getFn: func(context.Context) (commitmentopts.Options, error) { + return commitmentopts.Options{ + "aws": map[string][]commitmentopts.Combo{ + "rds": { + {Provider: "aws", Service: "rds", TermYears: 1, Payment: "all-upfront"}, + {Provider: "aws", Service: "rds", TermYears: 1, Payment: "partial-upfront"}, + {Provider: "aws", Service: "rds", TermYears: 3, Payment: "all-upfront"}, + }, + "elasticache": { + {Provider: "aws", Service: "elasticache", TermYears: 1, Payment: "no-upfront"}, + {Provider: "aws", Service: "elasticache", TermYears: 3, Payment: "no-upfront"}, + }, + }, + }, nil + }, + }} + + resp, err := h.getCommitmentOptions(context.Background()) + + require.NoError(t, err) + assert.Equal(t, "ok", resp.Status) + assert.Len(t, resp.AWS["rds"], 3) + assert.Len(t, resp.AWS["elasticache"], 2) + // Spot-check the payload shape: term/payment only, no provider/service echo. + assert.Contains(t, resp.AWS["elasticache"], commitmentOptionCombo{Term: 3, Payment: "no-upfront"}) +} diff --git a/internal/api/handler_config.go b/internal/api/handler_config.go index ffe62de4..2a184e9a 100644 --- a/internal/api/handler_config.go +++ b/internal/api/handler_config.go @@ -82,6 +82,31 @@ func (h *Handler) updateConfig(ctx context.Context, req *events.LambdaFunctionUR return &StatusResponse{Status: "updated"}, nil } +// checkCommitmentOptionCombo rejects saves that carry a (term, payment) +// combination we've dynamically confirmed the cloud doesn't sell. Returns +// nil when: no probe service is wired, the service hasn't persisted data +// yet (absent data → fall through to the frontend's hardcoded rules), +// the save isn't AWS, or the combo is valid. Errors from Validate are +// logged and swallowed (permissive) so a transient DB blip never blocks +// a settings save. +func (h *Handler) checkCommitmentOptionCombo(ctx context.Context, cfg config.ServiceConfig) error { + if h.commitmentOpts == nil || cfg.Provider != "aws" || cfg.Term <= 0 || cfg.Payment == "" { + return nil + } + ok, err := h.commitmentOpts.Validate(ctx, cfg.Provider, cfg.Service, cfg.Term, cfg.Payment) + if err != nil { + logging.Warnf("commitment-option validation error (allowing save): %v", err) + return nil + } + if !ok { + return NewClientError(400, fmt.Sprintf( + "%s does not support %dyr %s commitments", + cfg.Service, cfg.Term, cfg.Payment, + )) + } + return nil +} + // mergeServiceConfig loads any existing service config and overlays the four // UI-editable fields (Enabled, Term, Payment, Coverage) from cfg onto it, so // that filter fields set outside the UI (RampSchedule, IncludeEngines, etc.) @@ -172,6 +197,10 @@ func (h *Handler) updateServiceConfig(ctx context.Context, req *events.LambdaFun return nil, NewClientError(400, fmt.Sprintf("validation error: %s", err)) } + if err := h.checkCommitmentOptionCombo(ctx, cfg); err != nil { + return nil, err + } + if err := h.config.SaveServiceConfig(ctx, &cfg); err != nil { return nil, err } diff --git a/internal/api/handler_config_test.go b/internal/api/handler_config_test.go index 8acf8522..feb8cf55 100644 --- a/internal/api/handler_config_test.go +++ b/internal/api/handler_config_test.go @@ -202,6 +202,86 @@ func TestHandler_updateServiceConfig_InvalidBody(t *testing.T) { assert.Contains(t, err.Error(), "invalid request body") } +func TestHandler_updateServiceConfig_CommitmentOptsReject(t *testing.T) { + ctx := context.Background() + mockStore := new(MockConfigStore) + mockAuth := new(MockAuthService) + + adminSession := &Session{ + UserID: "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", + Email: "admin@example.com", + Role: "admin", + } + mockAuth.On("ValidateSession", ctx, "admin-token").Return(adminSession, nil) + mockStore.On("GetServiceConfig", ctx, "aws", "rds").Return(nil, nil) + + // Probe data says RDS 3yr no-upfront doesn't exist. Save must 400. + // SaveServiceConfig is NOT set up — asserting it's never called. + handler := &Handler{ + config: mockStore, + auth: mockAuth, + commitmentOpts: &stubCommitmentOpts{ + validateFn: func(_ context.Context, provider, service string, term int, payment string) (bool, error) { + assert.Equal(t, "aws", provider) + assert.Equal(t, "rds", service) + assert.Equal(t, 3, term) + assert.Equal(t, "no-upfront", payment) + return false, nil + }, + }, + } + + body := `{"enabled": true, "term": 3, "payment": "no-upfront", "coverage": 80}` + req := &events.LambdaFunctionURLRequest{ + Headers: map[string]string{"Authorization": "Bearer admin-token"}, + Body: body, + } + result, err := handler.updateServiceConfig(ctx, req, "aws/rds") + + require.Error(t, err) + assert.Nil(t, result) + ce, ok := IsClientError(err) + require.True(t, ok) + assert.Equal(t, 400, ce.code) + assert.Contains(t, ce.message, "3yr no-upfront") + mockStore.AssertNotCalled(t, "SaveServiceConfig", mock.Anything, mock.Anything) +} + +func TestHandler_updateServiceConfig_CommitmentOptsAccept(t *testing.T) { + ctx := context.Background() + mockStore := new(MockConfigStore) + mockAuth := new(MockAuthService) + + adminSession := &Session{ + UserID: "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", + Email: "admin@example.com", + Role: "admin", + } + mockAuth.On("ValidateSession", ctx, "admin-token").Return(adminSession, nil) + mockStore.On("GetServiceConfig", ctx, "aws", "rds").Return(nil, nil) + mockStore.On("SaveServiceConfig", ctx, mock.AnythingOfType("*config.ServiceConfig")).Return(nil) + + handler := &Handler{ + config: mockStore, + auth: mockAuth, + commitmentOpts: &stubCommitmentOpts{ + validateFn: func(context.Context, string, string, int, string) (bool, error) { + return true, nil + }, + }, + } + + body := `{"enabled": true, "term": 1, "payment": "all-upfront", "coverage": 80}` + req := &events.LambdaFunctionURLRequest{ + Headers: map[string]string{"Authorization": "Bearer admin-token"}, + Body: body, + } + result, err := handler.updateServiceConfig(ctx, req, "aws/rds") + + require.NoError(t, err) + assert.Equal(t, "updated", result.Status) +} + func TestHandler_updateServiceConfig_NoSlash(t *testing.T) { ctx := context.Background() mockStore := new(MockConfigStore) diff --git a/internal/api/openapi.yaml b/internal/api/openapi.yaml index e6a9a8d6..fdfcd331 100644 --- a/internal/api/openapi.yaml +++ b/internal/api/openapi.yaml @@ -167,6 +167,29 @@ paths: '401': $ref: '#/components/responses/Unauthorized' + /api/commitment-options: + get: + operationId: getCommitmentOptions + tags: [Configuration] + summary: Dynamically-probed AWS commitment combinations + description: | + Returns the (term, payment) combinations AWS actually sells for + commitment-capable services, harvested from the reserved-offerings + APIs at most once per server lifetime and persisted to Postgres. + When probe data is unavailable (no AWS account connected, probe + failed, transient DB error) returns `{"status":"unavailable"}` at + HTTP 200 so the frontend can fall back to hardcoded rules without + tripping its error-toast path. + responses: + '200': + description: Commitment options or unavailable status + content: + application/json: + schema: + $ref: '#/components/schemas/CommitmentOptionsResponse' + '401': + $ref: '#/components/responses/Unauthorized' + # ---- Recommendations ---------------------------------------------------- /api/recommendations: get: @@ -1776,6 +1799,41 @@ components: items: type: string + CommitmentOptionsResponse: + type: object + required: [status] + properties: + status: + type: string + enum: [ok, unavailable] + description: | + `ok` means `aws` carries the probed combos. `unavailable` means + the probe hasn't run (no AWS account connected) or the most + recent attempt failed; the frontend should use hardcoded + defaults. + aws: + type: object + description: | + Per-service supported (term, payment) tuples. Keys are service + identifiers (rds, elasticache, opensearch, redshift, memorydb, + ec2). Omitted when status is `unavailable`. + additionalProperties: + type: array + items: + $ref: '#/components/schemas/CommitmentOptionCombo' + + CommitmentOptionCombo: + type: object + required: [term, payment] + properties: + term: + type: integer + enum: [1, 3] + description: Commitment length in years. + payment: + type: string + enum: [all-upfront, partial-upfront, no-upfront] + # -- Dashboard ---------------------------------------------------------- DashboardSummary: type: object diff --git a/internal/api/router.go b/internal/api/router.go index bc26e908..6a514531 100644 --- a/internal/api/router.go +++ b/internal/api/router.go @@ -70,6 +70,11 @@ func (r *Router) registerRoutes() { {PathPrefix: "/api/config/service/", Method: "GET", Handler: r.getServiceConfigHandler}, {PathPrefix: "/api/config/service/", Method: "PUT", Handler: r.updateServiceConfigHandler}, + // Dynamically-probed AWS commitment-option combos. Non-admin reads + // (AuthUser) — data is public-ish (hardcoded in the frontend today) + // but we don't expose it unauthenticated. + {ExactPath: "/api/commitment-options", Method: "GET", Handler: r.commitmentOptionsHandler, Auth: AuthUser}, + // Recommendations endpoints {ExactPath: "/api/recommendations", Method: "GET", Handler: r.getRecommendationsHandler}, {ExactPath: "/api/recommendations/freshness", Method: "GET", Handler: r.getRecommendationsFreshnessHandler}, @@ -293,6 +298,10 @@ func (r *Router) updateServiceConfigHandler(ctx context.Context, req *events.Lam return r.h.updateServiceConfig(ctx, req, params["id"]) } +func (r *Router) commitmentOptionsHandler(ctx context.Context, req *events.LambdaFunctionURLRequest, params map[string]string) (any, error) { + return r.h.getCommitmentOptions(ctx) +} + func (r *Router) getRecommendationsHandler(ctx context.Context, req *events.LambdaFunctionURLRequest, params map[string]string) (any, error) { return r.h.getRecommendations(ctx, req, req.QueryStringParameters) } diff --git a/internal/api/types.go b/internal/api/types.go index 0609780d..ea92a04c 100644 --- a/internal/api/types.go +++ b/internal/api/types.go @@ -6,6 +6,7 @@ import ( "sync" "time" + "github.com/LeanerCloud/CUDly/internal/commitmentopts" "github.com/LeanerCloud/CUDly/internal/config" "github.com/LeanerCloud/CUDly/internal/credentials" "github.com/LeanerCloud/CUDly/internal/email" @@ -68,6 +69,18 @@ type HandlerConfig struct { // publish in the Discovery document. Must match what Azure AD // federated credentials are registered with. OIDCIssuerURL string + // CommitmentOpts discovers which (term, payment) combinations each + // AWS service actually sells and validates saves against that data. + // Nil disables both the /api/commitment-options endpoint (returns + // unavailable) and save-side validation in updateServiceConfig. + CommitmentOpts CommitmentOptsInterface +} + +// CommitmentOptsInterface lets us swap the real *commitmentopts.Service for +// a stub in handler tests without pulling in the probe+store machinery. +type CommitmentOptsInterface interface { + Get(ctx context.Context) (commitmentopts.Options, error) + Validate(ctx context.Context, provider, service string, term int, payment string) (bool, error) } // AnalyticsClientInterface defines the interface for analytics queries diff --git a/internal/commitmentopts/normalize.go b/internal/commitmentopts/normalize.go new file mode 100644 index 00000000..05aa1907 --- /dev/null +++ b/internal/commitmentopts/normalize.go @@ -0,0 +1,78 @@ +package commitmentopts + +import "strings" + +// oneYearSeconds and threeYearSeconds are the only reservation durations +// AWS currently sells across the services we probe. Anything else (90-day +// heavy-utilization RIs, 5-year Redshift offerings on the rare occasion +// they exist) is dropped — the frontend only exposes 1yr/3yr in the UI so +// other durations have no place to surface. +const ( + oneYearSeconds int64 = 31536000 + threeYearSeconds int64 = 94608000 +) + +// durationToTerm maps a duration-in-seconds (as returned by the AWS +// Describe*Offerings APIs) to a term in whole years. Returns (0, false) +// for anything outside {1yr, 3yr}; callers drop those Combos. +func durationToTerm(seconds int64) (int, bool) { + switch seconds { + case oneYearSeconds: + return 1, true + case threeYearSeconds: + return 3, true + default: + return 0, false + } +} + +// normalizePayment canonicalizes the messy zoo of payment-option spellings +// AWS uses across services into one of our three tokens. Input may be: +// +// - "All Upfront" / "Partial Upfront" / "No Upfront" +// (RDS/ElastiCache/Redshift/MemoryDB OfferingType strings) +// - "ALL_UPFRONT" / "PARTIAL_UPFRONT" / "NO_UPFRONT" +// (OpenSearch's ReservedInstancePaymentOption enum stringer) +// - already-canonical "all-upfront" etc. +// +// It explicitly rejects legacy pre-2011 ElastiCache/EC2 utilization tokens +// ("Light Utilization" / "Medium Utilization" / "Heavy Utilization") — those +// predate the modern (term, payment) model and there is no sensible mapping +// into it. +func normalizePayment(raw string) (string, bool) { + token := stripNonAlnumLower(raw) + switch token { + case "allupfront": + return "all-upfront", true + case "partialupfront": + return "partial-upfront", true + case "noupfront": + return "no-upfront", true + case "lightutilization", "mediumutilization", "heavyutilization": + // Legacy utilization-based offerings — deliberately rejected. + return "", false + default: + return "", false + } +} + +// stripNonAlnumLower returns s lowercased with every non-alphanumeric +// character removed. It is the canonical form used to compare the many +// payment-option spellings AWS APIs emit. +func stripNonAlnumLower(s string) string { + var b strings.Builder + b.Grow(len(s)) + for _, r := range s { + switch { + case r >= 'A' && r <= 'Z': + b.WriteRune(r + ('a' - 'A')) + case r >= 'a' && r <= 'z': + b.WriteRune(r) + case r >= '0' && r <= '9': + b.WriteRune(r) + default: + // Drop spaces, hyphens, underscores, and everything else. + } + } + return b.String() +} diff --git a/internal/commitmentopts/normalize_test.go b/internal/commitmentopts/normalize_test.go new file mode 100644 index 00000000..7053775e --- /dev/null +++ b/internal/commitmentopts/normalize_test.go @@ -0,0 +1,94 @@ +package commitmentopts + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestDurationToTerm(t *testing.T) { + cases := []struct { + name string + seconds int64 + term int + ok bool + }{ + {"1yr", 31536000, 1, true}, + {"3yr", 94608000, 3, true}, + {"zero", 0, 0, false}, + {"negative", -1, 0, false}, + {"5yr", 5 * 31536000, 0, false}, + {"90 days", 90 * 86400, 0, false}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + term, ok := durationToTerm(tc.seconds) + assert.Equal(t, tc.term, term) + assert.Equal(t, tc.ok, ok) + }) + } +} + +func TestNormalizePayment(t *testing.T) { + cases := []struct { + name string + in string + out string + ok bool + }{ + // SDK-typed string variants ("All Upfront" / ...). + {"rds All Upfront", "All Upfront", "all-upfront", true}, + {"rds Partial Upfront", "Partial Upfront", "partial-upfront", true}, + {"rds No Upfront", "No Upfront", "no-upfront", true}, + + // OpenSearch enum stringer variant (ALL_UPFRONT). + {"enum ALL_UPFRONT", "ALL_UPFRONT", "all-upfront", true}, + {"enum PARTIAL_UPFRONT", "PARTIAL_UPFRONT", "partial-upfront", true}, + {"enum NO_UPFRONT", "NO_UPFRONT", "no-upfront", true}, + + // Already-canonical lowercase-kebab form. + {"canonical all-upfront", "all-upfront", "all-upfront", true}, + {"canonical partial-upfront", "partial-upfront", "partial-upfront", true}, + {"canonical no-upfront", "no-upfront", "no-upfront", true}, + + // Mixed-case / weird spacing — still normalizes. + {"mixed case", "aLl UpFrOnT", "all-upfront", true}, + {"leading/trailing whitespace", " All Upfront ", "all-upfront", true}, + {"underscore lowercase", "all_upfront", "all-upfront", true}, + + // Legacy utilization-based offerings must be rejected. + {"light utilization", "Light Utilization", "", false}, + {"medium utilization", "Medium Utilization", "", false}, + {"heavy utilization", "Heavy Utilization", "", false}, + {"light lowercase", "lightutilization", "", false}, + + // Unknown payment option — dropped. + {"unknown", "Something Else", "", false}, + {"empty", "", "", false}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got, ok := normalizePayment(tc.in) + assert.Equal(t, tc.out, got) + assert.Equal(t, tc.ok, ok) + }) + } +} + +func TestStripNonAlnumLower(t *testing.T) { + cases := []struct { + in string + out string + }{ + {"All Upfront", "allupfront"}, + {"ALL_UPFRONT", "allupfront"}, + {" all-upfront ", "allupfront"}, + {"a1b2c3", "a1b2c3"}, + {"", ""}, + } + for _, tc := range cases { + t.Run(tc.in, func(t *testing.T) { + assert.Equal(t, tc.out, stripNonAlnumLower(tc.in)) + }) + } +} diff --git a/internal/commitmentopts/probe.go b/internal/commitmentopts/probe.go new file mode 100644 index 00000000..b293a65f --- /dev/null +++ b/internal/commitmentopts/probe.go @@ -0,0 +1,426 @@ +package commitmentopts + +import ( + "context" + "fmt" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/ec2" + ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" + "github.com/aws/aws-sdk-go-v2/service/elasticache" + "github.com/aws/aws-sdk-go-v2/service/memorydb" + "github.com/aws/aws-sdk-go-v2/service/opensearch" + "github.com/aws/aws-sdk-go-v2/service/rds" + "github.com/aws/aws-sdk-go-v2/service/redshift" +) + +// maxPages is the hard ceiling on paginated Describe*Offerings calls per +// probe. We only need unique (duration, payment) tuples, which saturate +// well before 5 pages of 100 offerings each. The cap bounds worst-case API +// spend if an SDK change ever breaks pagination detection. +const maxPages = 5 + +// pageSize is the MaxRecords/MaxResults value we request. 100 is the +// documented maximum for every AWS Describe*Offerings API we call. +const pageSize int32 = 100 + +// Canonical probe targets. Picked per service so "offerings exist" is +// guaranteed in every commercial region — small/cheap instance types with +// long-standing public availability. The targets never round-trip through +// a purchase so their cost is irrelevant; what matters is that AWS +// actually has offerings to return for them. +const ( + probeTargetRDS = "db.t3.micro" + probeTargetElastiCache = "cache.t3.micro" + probeTargetOpenSearch = "t3.small.search" + probeTargetRedshift = "dc2.large" + probeTargetMemoryDB = "db.t4g.small" + probeTargetEC2 = "t3.micro" +) + +// collect dedupes a probe's raw (durationSeconds, rawPayment) pairs, runs +// both normalizers, and builds the Combo slice. Duplicates — a single +// (term, payment) tuple appears once per instance size × AZ × engine +// variant — are collapsed so the caller sees at most six Combos per +// service (2 terms × 3 payments). +func collect(service string, raw []rawOffer) []Combo { + type key struct { + term int + payment string + } + seen := make(map[key]struct{}, len(raw)) + out := make([]Combo, 0, len(raw)) + for _, r := range raw { + term, ok := durationToTerm(r.durationSeconds) + if !ok { + continue + } + payment, ok := normalizePayment(r.payment) + if !ok { + continue + } + k := key{term: term, payment: payment} + if _, dup := seen[k]; dup { + continue + } + seen[k] = struct{}{} + out = append(out, Combo{ + Provider: "aws", + Service: service, + TermYears: term, + Payment: payment, + }) + } + return out +} + +// rawOffer is the pre-normalization shape every per-service probe feeds +// into collect(). Keeping the shape uniform means normalization lives in +// exactly one place. +type rawOffer struct { + durationSeconds int64 + payment string +} + +// --------------------------------------------------------------------------- +// RDS +// --------------------------------------------------------------------------- + +// RDSDescribeOfferings is the minimal RDS surface the probe needs. It +// matches the method signature on the generated client so tests can +// substitute a mock without dragging in the full RDSAPI interface. +type RDSDescribeOfferings interface { + DescribeReservedDBInstancesOfferings(ctx context.Context, params *rds.DescribeReservedDBInstancesOfferingsInput, optFns ...func(*rds.Options)) (*rds.DescribeReservedDBInstancesOfferingsOutput, error) +} + +// RDSProber probes rds:DescribeReservedDBInstancesOfferings. +type RDSProber struct { + // NewClient builds a client from the probe's aws.Config. Override in + // tests to return a mock. + NewClient func(cfg aws.Config) RDSDescribeOfferings +} + +// Service returns "rds". +func (p *RDSProber) Service() string { return "rds" } + +// Probe returns the normalized (term, payment) combos RDS currently sells +// against db.t3.micro. +func (p *RDSProber) Probe(ctx context.Context, cfg aws.Config) ([]Combo, error) { + client := p.client(cfg) + var raw []rawOffer + var marker *string + for page := 0; page < maxPages; page++ { + out, err := client.DescribeReservedDBInstancesOfferings(ctx, &rds.DescribeReservedDBInstancesOfferingsInput{ + DBInstanceClass: aws.String(probeTargetRDS), + MaxRecords: aws.Int32(pageSize), + Marker: marker, + }) + if err != nil { + return nil, fmt.Errorf("rds: %w", err) + } + for _, o := range out.ReservedDBInstancesOfferings { + raw = append(raw, rawOffer{ + durationSeconds: int64(aws.ToInt32(o.Duration)), + payment: aws.ToString(o.OfferingType), + }) + } + if out.Marker == nil || aws.ToString(out.Marker) == "" { + break + } + marker = out.Marker + } + return collect(p.Service(), raw), nil +} + +func (p *RDSProber) client(cfg aws.Config) RDSDescribeOfferings { + if p.NewClient != nil { + return p.NewClient(cfg) + } + return rds.NewFromConfig(cfg) +} + +// --------------------------------------------------------------------------- +// ElastiCache +// --------------------------------------------------------------------------- + +// ElastiCacheDescribeOfferings is the minimal ElastiCache surface we use. +type ElastiCacheDescribeOfferings interface { + DescribeReservedCacheNodesOfferings(ctx context.Context, params *elasticache.DescribeReservedCacheNodesOfferingsInput, optFns ...func(*elasticache.Options)) (*elasticache.DescribeReservedCacheNodesOfferingsOutput, error) +} + +// ElastiCacheProber probes elasticache:DescribeReservedCacheNodesOfferings. +type ElastiCacheProber struct { + NewClient func(cfg aws.Config) ElastiCacheDescribeOfferings +} + +// Service returns "elasticache". +func (p *ElastiCacheProber) Service() string { return "elasticache" } + +// Probe returns the combos for cache.t3.micro. +func (p *ElastiCacheProber) Probe(ctx context.Context, cfg aws.Config) ([]Combo, error) { + client := p.client(cfg) + var raw []rawOffer + var marker *string + for page := 0; page < maxPages; page++ { + out, err := client.DescribeReservedCacheNodesOfferings(ctx, &elasticache.DescribeReservedCacheNodesOfferingsInput{ + CacheNodeType: aws.String(probeTargetElastiCache), + MaxRecords: aws.Int32(pageSize), + Marker: marker, + }) + if err != nil { + return nil, fmt.Errorf("elasticache: %w", err) + } + for _, o := range out.ReservedCacheNodesOfferings { + raw = append(raw, rawOffer{ + durationSeconds: int64(aws.ToInt32(o.Duration)), + payment: aws.ToString(o.OfferingType), + }) + } + if out.Marker == nil || aws.ToString(out.Marker) == "" { + break + } + marker = out.Marker + } + return collect(p.Service(), raw), nil +} + +func (p *ElastiCacheProber) client(cfg aws.Config) ElastiCacheDescribeOfferings { + if p.NewClient != nil { + return p.NewClient(cfg) + } + return elasticache.NewFromConfig(cfg) +} + +// --------------------------------------------------------------------------- +// OpenSearch +// --------------------------------------------------------------------------- + +// OpenSearchDescribeOfferings is the minimal OpenSearch surface we use. +// The OpenSearch API has no per-instance-type filter on this endpoint, so +// the probe filters client-side after fetching. +type OpenSearchDescribeOfferings interface { + DescribeReservedInstanceOfferings(ctx context.Context, params *opensearch.DescribeReservedInstanceOfferingsInput, optFns ...func(*opensearch.Options)) (*opensearch.DescribeReservedInstanceOfferingsOutput, error) +} + +// OpenSearchProber probes opensearch:DescribeReservedInstanceOfferings. +type OpenSearchProber struct { + NewClient func(cfg aws.Config) OpenSearchDescribeOfferings +} + +// Service returns "opensearch". +func (p *OpenSearchProber) Service() string { return "opensearch" } + +// Probe returns the combos for t3.small.search. +func (p *OpenSearchProber) Probe(ctx context.Context, cfg aws.Config) ([]Combo, error) { + client := p.client(cfg) + var raw []rawOffer + var nextToken *string + for page := 0; page < maxPages; page++ { + out, err := client.DescribeReservedInstanceOfferings(ctx, &opensearch.DescribeReservedInstanceOfferingsInput{ + MaxResults: pageSize, + NextToken: nextToken, + }) + if err != nil { + return nil, fmt.Errorf("opensearch: %w", err) + } + for _, o := range out.ReservedInstanceOfferings { + if string(o.InstanceType) != probeTargetOpenSearch { + continue + } + raw = append(raw, rawOffer{ + durationSeconds: int64(o.Duration), + payment: string(o.PaymentOption), + }) + } + if out.NextToken == nil || aws.ToString(out.NextToken) == "" { + break + } + nextToken = out.NextToken + } + return collect(p.Service(), raw), nil +} + +func (p *OpenSearchProber) client(cfg aws.Config) OpenSearchDescribeOfferings { + if p.NewClient != nil { + return p.NewClient(cfg) + } + return opensearch.NewFromConfig(cfg) +} + +// --------------------------------------------------------------------------- +// Redshift +// --------------------------------------------------------------------------- + +// RedshiftDescribeOfferings is the minimal Redshift surface we use. The +// API has no NodeType filter on DescribeReservedNodeOfferings so the probe +// filters client-side. +type RedshiftDescribeOfferings interface { + DescribeReservedNodeOfferings(ctx context.Context, params *redshift.DescribeReservedNodeOfferingsInput, optFns ...func(*redshift.Options)) (*redshift.DescribeReservedNodeOfferingsOutput, error) +} + +// RedshiftProber probes redshift:DescribeReservedNodeOfferings. +type RedshiftProber struct { + NewClient func(cfg aws.Config) RedshiftDescribeOfferings +} + +// Service returns "redshift". +func (p *RedshiftProber) Service() string { return "redshift" } + +// Probe returns the combos for dc2.large. +func (p *RedshiftProber) Probe(ctx context.Context, cfg aws.Config) ([]Combo, error) { + client := p.client(cfg) + var raw []rawOffer + var marker *string + for page := 0; page < maxPages; page++ { + out, err := client.DescribeReservedNodeOfferings(ctx, &redshift.DescribeReservedNodeOfferingsInput{ + MaxRecords: aws.Int32(pageSize), + Marker: marker, + }) + if err != nil { + return nil, fmt.Errorf("redshift: %w", err) + } + for _, o := range out.ReservedNodeOfferings { + if aws.ToString(o.NodeType) != probeTargetRedshift { + continue + } + raw = append(raw, rawOffer{ + durationSeconds: int64(aws.ToInt32(o.Duration)), + payment: aws.ToString(o.OfferingType), + }) + } + if out.Marker == nil || aws.ToString(out.Marker) == "" { + break + } + marker = out.Marker + } + return collect(p.Service(), raw), nil +} + +func (p *RedshiftProber) client(cfg aws.Config) RedshiftDescribeOfferings { + if p.NewClient != nil { + return p.NewClient(cfg) + } + return redshift.NewFromConfig(cfg) +} + +// --------------------------------------------------------------------------- +// MemoryDB +// --------------------------------------------------------------------------- + +// MemoryDBDescribeOfferings is the minimal MemoryDB surface we use. +type MemoryDBDescribeOfferings interface { + DescribeReservedNodesOfferings(ctx context.Context, params *memorydb.DescribeReservedNodesOfferingsInput, optFns ...func(*memorydb.Options)) (*memorydb.DescribeReservedNodesOfferingsOutput, error) +} + +// MemoryDBProber probes memorydb:DescribeReservedNodesOfferings. +type MemoryDBProber struct { + NewClient func(cfg aws.Config) MemoryDBDescribeOfferings +} + +// Service returns "memorydb". +func (p *MemoryDBProber) Service() string { return "memorydb" } + +// Probe returns the combos for db.t4g.small. +func (p *MemoryDBProber) Probe(ctx context.Context, cfg aws.Config) ([]Combo, error) { + client := p.client(cfg) + var raw []rawOffer + var nextToken *string + for page := 0; page < maxPages; page++ { + out, err := client.DescribeReservedNodesOfferings(ctx, &memorydb.DescribeReservedNodesOfferingsInput{ + NodeType: aws.String(probeTargetMemoryDB), + MaxResults: aws.Int32(pageSize), + NextToken: nextToken, + }) + if err != nil { + return nil, fmt.Errorf("memorydb: %w", err) + } + for _, o := range out.ReservedNodesOfferings { + raw = append(raw, rawOffer{ + durationSeconds: int64(o.Duration), + payment: aws.ToString(o.OfferingType), + }) + } + if out.NextToken == nil || aws.ToString(out.NextToken) == "" { + break + } + nextToken = out.NextToken + } + return collect(p.Service(), raw), nil +} + +func (p *MemoryDBProber) client(cfg aws.Config) MemoryDBDescribeOfferings { + if p.NewClient != nil { + return p.NewClient(cfg) + } + return memorydb.NewFromConfig(cfg) +} + +// --------------------------------------------------------------------------- +// EC2 +// --------------------------------------------------------------------------- + +// EC2DescribeOfferings is the minimal EC2 surface we use. +type EC2DescribeOfferings interface { + DescribeReservedInstancesOfferings(ctx context.Context, params *ec2.DescribeReservedInstancesOfferingsInput, optFns ...func(*ec2.Options)) (*ec2.DescribeReservedInstancesOfferingsOutput, error) +} + +// EC2Prober probes ec2:DescribeReservedInstancesOfferings. +type EC2Prober struct { + NewClient func(cfg aws.Config) EC2DescribeOfferings +} + +// Service returns "ec2". +func (p *EC2Prober) Service() string { return "ec2" } + +// Probe returns the combos for t3.micro. IncludeMarketplace is explicitly +// false so we only see AWS-native (standard/convertible) offerings — the +// Marketplace resale market has arbitrary durations that would pollute +// normalization. +func (p *EC2Prober) Probe(ctx context.Context, cfg aws.Config) ([]Combo, error) { + client := p.client(cfg) + var raw []rawOffer + var nextToken *string + for page := 0; page < maxPages; page++ { + out, err := client.DescribeReservedInstancesOfferings(ctx, &ec2.DescribeReservedInstancesOfferingsInput{ + InstanceType: ec2types.InstanceType(probeTargetEC2), + IncludeMarketplace: aws.Bool(false), + MaxResults: aws.Int32(pageSize), + NextToken: nextToken, + }) + if err != nil { + return nil, fmt.Errorf("ec2: %w", err) + } + for _, o := range out.ReservedInstancesOfferings { + raw = append(raw, rawOffer{ + durationSeconds: aws.ToInt64(o.Duration), + payment: string(o.OfferingType), + }) + } + if out.NextToken == nil || aws.ToString(out.NextToken) == "" { + break + } + nextToken = out.NextToken + } + return collect(p.Service(), raw), nil +} + +func (p *EC2Prober) client(cfg aws.Config) EC2DescribeOfferings { + if p.NewClient != nil { + return p.NewClient(cfg) + } + return ec2.NewFromConfig(cfg) +} + +// DefaultProbers returns one prober instance per commitment-capable +// service. The Service wires these up by default; tests override via a +// custom slice. +func DefaultProbers() []Prober { + return []Prober{ + &RDSProber{}, + &ElastiCacheProber{}, + &OpenSearchProber{}, + &RedshiftProber{}, + &MemoryDBProber{}, + &EC2Prober{}, + } +} diff --git a/internal/commitmentopts/probe_test.go b/internal/commitmentopts/probe_test.go new file mode 100644 index 00000000..fd66dba7 --- /dev/null +++ b/internal/commitmentopts/probe_test.go @@ -0,0 +1,346 @@ +package commitmentopts + +import ( + "context" + "errors" + "sort" + "testing" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/ec2" + ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" + "github.com/aws/aws-sdk-go-v2/service/elasticache" + elasticachetypes "github.com/aws/aws-sdk-go-v2/service/elasticache/types" + "github.com/aws/aws-sdk-go-v2/service/memorydb" + memorydbtypes "github.com/aws/aws-sdk-go-v2/service/memorydb/types" + "github.com/aws/aws-sdk-go-v2/service/opensearch" + opensearchtypes "github.com/aws/aws-sdk-go-v2/service/opensearch/types" + "github.com/aws/aws-sdk-go-v2/service/rds" + rdstypes "github.com/aws/aws-sdk-go-v2/service/rds/types" + "github.com/aws/aws-sdk-go-v2/service/redshift" + redshifttypes "github.com/aws/aws-sdk-go-v2/service/redshift/types" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// sortCombos gives tests a stable comparison order — probers dedupe by +// map keys so iteration order is otherwise nondeterministic. +func sortCombos(c []Combo) []Combo { + sort.Slice(c, func(i, j int) bool { + if c[i].TermYears != c[j].TermYears { + return c[i].TermYears < c[j].TermYears + } + return c[i].Payment < c[j].Payment + }) + return c +} + +// --------------------------------------------------------------------------- +// RDS +// --------------------------------------------------------------------------- + +type fakeRDS struct { + fn func(*rds.DescribeReservedDBInstancesOfferingsInput) (*rds.DescribeReservedDBInstancesOfferingsOutput, error) +} + +func (f *fakeRDS) DescribeReservedDBInstancesOfferings(ctx context.Context, in *rds.DescribeReservedDBInstancesOfferingsInput, _ ...func(*rds.Options)) (*rds.DescribeReservedDBInstancesOfferingsOutput, error) { + return f.fn(in) +} + +func TestRDSProber_Probe(t *testing.T) { + fake := &fakeRDS{ + fn: func(in *rds.DescribeReservedDBInstancesOfferingsInput) (*rds.DescribeReservedDBInstancesOfferingsOutput, error) { + assert.Equal(t, probeTargetRDS, aws.ToString(in.DBInstanceClass)) + return &rds.DescribeReservedDBInstancesOfferingsOutput{ + ReservedDBInstancesOfferings: []rdstypes.ReservedDBInstancesOffering{ + {Duration: aws.Int32(31536000), OfferingType: aws.String("All Upfront")}, + {Duration: aws.Int32(31536000), OfferingType: aws.String("Partial Upfront")}, + {Duration: aws.Int32(31536000), OfferingType: aws.String("No Upfront")}, + {Duration: aws.Int32(94608000), OfferingType: aws.String("All Upfront")}, + // dup of a 1yr All Upfront — must collapse + {Duration: aws.Int32(31536000), OfferingType: aws.String("All Upfront")}, + // anomalies + {Duration: aws.Int32(94608000), OfferingType: aws.String("Light Utilization")}, + {Duration: aws.Int32(5 * 31536000), OfferingType: aws.String("All Upfront")}, + {Duration: aws.Int32(31536000), OfferingType: aws.String("Mystery Option")}, + }, + }, nil + }, + } + p := &RDSProber{NewClient: func(cfg aws.Config) RDSDescribeOfferings { return fake }} + + assert.Equal(t, "rds", p.Service()) + + got, err := p.Probe(context.Background(), aws.Config{}) + require.NoError(t, err) + got = sortCombos(got) + want := []Combo{ + {Provider: "aws", Service: "rds", TermYears: 1, Payment: "all-upfront"}, + {Provider: "aws", Service: "rds", TermYears: 1, Payment: "no-upfront"}, + {Provider: "aws", Service: "rds", TermYears: 1, Payment: "partial-upfront"}, + {Provider: "aws", Service: "rds", TermYears: 3, Payment: "all-upfront"}, + } + assert.Equal(t, want, got) +} + +func TestRDSProber_ErrorPropagates(t *testing.T) { + boom := errors.New("boom") + fake := &fakeRDS{fn: func(*rds.DescribeReservedDBInstancesOfferingsInput) (*rds.DescribeReservedDBInstancesOfferingsOutput, error) { + return nil, boom + }} + p := &RDSProber{NewClient: func(cfg aws.Config) RDSDescribeOfferings { return fake }} + _, err := p.Probe(context.Background(), aws.Config{}) + require.Error(t, err) + assert.ErrorIs(t, err, boom) +} + +func TestRDSProber_PageCap(t *testing.T) { + // Every page returns a non-empty marker; the probe must stop after + // maxPages to bound API spend. + calls := 0 + fake := &fakeRDS{fn: func(*rds.DescribeReservedDBInstancesOfferingsInput) (*rds.DescribeReservedDBInstancesOfferingsOutput, error) { + calls++ + return &rds.DescribeReservedDBInstancesOfferingsOutput{ + Marker: aws.String("more"), + }, nil + }} + p := &RDSProber{NewClient: func(cfg aws.Config) RDSDescribeOfferings { return fake }} + _, err := p.Probe(context.Background(), aws.Config{}) + require.NoError(t, err) + assert.Equal(t, maxPages, calls) +} + +// --------------------------------------------------------------------------- +// ElastiCache +// --------------------------------------------------------------------------- + +type fakeElastiCache struct { + fn func(*elasticache.DescribeReservedCacheNodesOfferingsInput) (*elasticache.DescribeReservedCacheNodesOfferingsOutput, error) +} + +func (f *fakeElastiCache) DescribeReservedCacheNodesOfferings(ctx context.Context, in *elasticache.DescribeReservedCacheNodesOfferingsInput, _ ...func(*elasticache.Options)) (*elasticache.DescribeReservedCacheNodesOfferingsOutput, error) { + return f.fn(in) +} + +func TestElastiCacheProber_Probe(t *testing.T) { + fake := &fakeElastiCache{ + fn: func(in *elasticache.DescribeReservedCacheNodesOfferingsInput) (*elasticache.DescribeReservedCacheNodesOfferingsOutput, error) { + assert.Equal(t, probeTargetElastiCache, aws.ToString(in.CacheNodeType)) + return &elasticache.DescribeReservedCacheNodesOfferingsOutput{ + ReservedCacheNodesOfferings: []elasticachetypes.ReservedCacheNodesOffering{ + {Duration: aws.Int32(31536000), OfferingType: aws.String("All Upfront")}, + {Duration: aws.Int32(94608000), OfferingType: aws.String("No Upfront")}, + // legacy utilization-style — must be dropped + {Duration: aws.Int32(94608000), OfferingType: aws.String("Heavy Utilization")}, + }, + }, nil + }, + } + p := &ElastiCacheProber{NewClient: func(cfg aws.Config) ElastiCacheDescribeOfferings { return fake }} + assert.Equal(t, "elasticache", p.Service()) + + got, err := p.Probe(context.Background(), aws.Config{}) + require.NoError(t, err) + got = sortCombos(got) + want := []Combo{ + {Provider: "aws", Service: "elasticache", TermYears: 1, Payment: "all-upfront"}, + {Provider: "aws", Service: "elasticache", TermYears: 3, Payment: "no-upfront"}, + } + assert.Equal(t, want, got) +} + +// --------------------------------------------------------------------------- +// OpenSearch +// --------------------------------------------------------------------------- + +type fakeOpenSearch struct { + fn func(*opensearch.DescribeReservedInstanceOfferingsInput) (*opensearch.DescribeReservedInstanceOfferingsOutput, error) +} + +func (f *fakeOpenSearch) DescribeReservedInstanceOfferings(ctx context.Context, in *opensearch.DescribeReservedInstanceOfferingsInput, _ ...func(*opensearch.Options)) (*opensearch.DescribeReservedInstanceOfferingsOutput, error) { + return f.fn(in) +} + +func TestOpenSearchProber_Probe(t *testing.T) { + fake := &fakeOpenSearch{ + fn: func(in *opensearch.DescribeReservedInstanceOfferingsInput) (*opensearch.DescribeReservedInstanceOfferingsOutput, error) { + return &opensearch.DescribeReservedInstanceOfferingsOutput{ + ReservedInstanceOfferings: []opensearchtypes.ReservedInstanceOffering{ + { + InstanceType: opensearchtypes.OpenSearchPartitionInstanceType(probeTargetOpenSearch), + Duration: 31536000, + PaymentOption: opensearchtypes.ReservedInstancePaymentOptionAllUpfront, + }, + { + InstanceType: opensearchtypes.OpenSearchPartitionInstanceType(probeTargetOpenSearch), + Duration: 94608000, + PaymentOption: opensearchtypes.ReservedInstancePaymentOptionPartialUpfront, + }, + // off-instance-type — must be filtered out client-side + { + InstanceType: opensearchtypes.OpenSearchPartitionInstanceType("r6g.large.search"), + Duration: 31536000, + PaymentOption: opensearchtypes.ReservedInstancePaymentOptionNoUpfront, + }, + }, + }, nil + }, + } + p := &OpenSearchProber{NewClient: func(cfg aws.Config) OpenSearchDescribeOfferings { return fake }} + assert.Equal(t, "opensearch", p.Service()) + + got, err := p.Probe(context.Background(), aws.Config{}) + require.NoError(t, err) + got = sortCombos(got) + want := []Combo{ + {Provider: "aws", Service: "opensearch", TermYears: 1, Payment: "all-upfront"}, + {Provider: "aws", Service: "opensearch", TermYears: 3, Payment: "partial-upfront"}, + } + assert.Equal(t, want, got) +} + +// --------------------------------------------------------------------------- +// Redshift +// --------------------------------------------------------------------------- + +type fakeRedshift struct { + fn func(*redshift.DescribeReservedNodeOfferingsInput) (*redshift.DescribeReservedNodeOfferingsOutput, error) +} + +func (f *fakeRedshift) DescribeReservedNodeOfferings(ctx context.Context, in *redshift.DescribeReservedNodeOfferingsInput, _ ...func(*redshift.Options)) (*redshift.DescribeReservedNodeOfferingsOutput, error) { + return f.fn(in) +} + +func TestRedshiftProber_Probe(t *testing.T) { + fake := &fakeRedshift{ + fn: func(in *redshift.DescribeReservedNodeOfferingsInput) (*redshift.DescribeReservedNodeOfferingsOutput, error) { + return &redshift.DescribeReservedNodeOfferingsOutput{ + ReservedNodeOfferings: []redshifttypes.ReservedNodeOffering{ + { + NodeType: aws.String(probeTargetRedshift), + Duration: aws.Int32(31536000), + OfferingType: aws.String("All Upfront"), + }, + { + NodeType: aws.String(probeTargetRedshift), + Duration: aws.Int32(94608000), + OfferingType: aws.String("No Upfront"), + }, + // off-node-type — filtered + { + NodeType: aws.String("ra3.xlplus"), + Duration: aws.Int32(31536000), + OfferingType: aws.String("All Upfront"), + }, + }, + }, nil + }, + } + p := &RedshiftProber{NewClient: func(cfg aws.Config) RedshiftDescribeOfferings { return fake }} + assert.Equal(t, "redshift", p.Service()) + + got, err := p.Probe(context.Background(), aws.Config{}) + require.NoError(t, err) + got = sortCombos(got) + want := []Combo{ + {Provider: "aws", Service: "redshift", TermYears: 1, Payment: "all-upfront"}, + {Provider: "aws", Service: "redshift", TermYears: 3, Payment: "no-upfront"}, + } + assert.Equal(t, want, got) +} + +// --------------------------------------------------------------------------- +// MemoryDB +// --------------------------------------------------------------------------- + +type fakeMemoryDB struct { + fn func(*memorydb.DescribeReservedNodesOfferingsInput) (*memorydb.DescribeReservedNodesOfferingsOutput, error) +} + +func (f *fakeMemoryDB) DescribeReservedNodesOfferings(ctx context.Context, in *memorydb.DescribeReservedNodesOfferingsInput, _ ...func(*memorydb.Options)) (*memorydb.DescribeReservedNodesOfferingsOutput, error) { + return f.fn(in) +} + +func TestMemoryDBProber_Probe(t *testing.T) { + fake := &fakeMemoryDB{ + fn: func(in *memorydb.DescribeReservedNodesOfferingsInput) (*memorydb.DescribeReservedNodesOfferingsOutput, error) { + assert.Equal(t, probeTargetMemoryDB, aws.ToString(in.NodeType)) + return &memorydb.DescribeReservedNodesOfferingsOutput{ + ReservedNodesOfferings: []memorydbtypes.ReservedNodesOffering{ + {Duration: 31536000, OfferingType: aws.String("All Upfront")}, + {Duration: 94608000, OfferingType: aws.String("Partial Upfront")}, + // anomaly: 18-month duration + {Duration: int32(18 * 30 * 86400), OfferingType: aws.String("All Upfront")}, + }, + }, nil + }, + } + p := &MemoryDBProber{NewClient: func(cfg aws.Config) MemoryDBDescribeOfferings { return fake }} + assert.Equal(t, "memorydb", p.Service()) + + got, err := p.Probe(context.Background(), aws.Config{}) + require.NoError(t, err) + got = sortCombos(got) + want := []Combo{ + {Provider: "aws", Service: "memorydb", TermYears: 1, Payment: "all-upfront"}, + {Provider: "aws", Service: "memorydb", TermYears: 3, Payment: "partial-upfront"}, + } + assert.Equal(t, want, got) +} + +// --------------------------------------------------------------------------- +// EC2 +// --------------------------------------------------------------------------- + +type fakeEC2 struct { + fn func(*ec2.DescribeReservedInstancesOfferingsInput) (*ec2.DescribeReservedInstancesOfferingsOutput, error) +} + +func (f *fakeEC2) DescribeReservedInstancesOfferings(ctx context.Context, in *ec2.DescribeReservedInstancesOfferingsInput, _ ...func(*ec2.Options)) (*ec2.DescribeReservedInstancesOfferingsOutput, error) { + return f.fn(in) +} + +func TestEC2Prober_Probe(t *testing.T) { + fake := &fakeEC2{ + fn: func(in *ec2.DescribeReservedInstancesOfferingsInput) (*ec2.DescribeReservedInstancesOfferingsOutput, error) { + assert.Equal(t, ec2types.InstanceType(probeTargetEC2), in.InstanceType) + require.NotNil(t, in.IncludeMarketplace) + assert.False(t, aws.ToBool(in.IncludeMarketplace)) + return &ec2.DescribeReservedInstancesOfferingsOutput{ + ReservedInstancesOfferings: []ec2types.ReservedInstancesOffering{ + {Duration: aws.Int64(31536000), OfferingType: ec2types.OfferingTypeValuesAllUpfront}, + {Duration: aws.Int64(31536000), OfferingType: ec2types.OfferingTypeValuesPartialUpfront}, + {Duration: aws.Int64(31536000), OfferingType: ec2types.OfferingTypeValuesNoUpfront}, + {Duration: aws.Int64(94608000), OfferingType: ec2types.OfferingTypeValuesAllUpfront}, + // legacy pre-2011 utilization — must be dropped + {Duration: aws.Int64(31536000), OfferingType: ec2types.OfferingTypeValuesHeavyUtilization}, + {Duration: aws.Int64(31536000), OfferingType: ec2types.OfferingTypeValuesMediumUtilization}, + {Duration: aws.Int64(31536000), OfferingType: ec2types.OfferingTypeValuesLightUtilization}, + }, + }, nil + }, + } + p := &EC2Prober{NewClient: func(cfg aws.Config) EC2DescribeOfferings { return fake }} + assert.Equal(t, "ec2", p.Service()) + + got, err := p.Probe(context.Background(), aws.Config{}) + require.NoError(t, err) + got = sortCombos(got) + want := []Combo{ + {Provider: "aws", Service: "ec2", TermYears: 1, Payment: "all-upfront"}, + {Provider: "aws", Service: "ec2", TermYears: 1, Payment: "no-upfront"}, + {Provider: "aws", Service: "ec2", TermYears: 1, Payment: "partial-upfront"}, + {Provider: "aws", Service: "ec2", TermYears: 3, Payment: "all-upfront"}, + } + assert.Equal(t, want, got) +} + +func TestDefaultProbers(t *testing.T) { + probers := DefaultProbers() + services := make([]string, 0, len(probers)) + for _, p := range probers { + services = append(services, p.Service()) + } + sort.Strings(services) + assert.Equal(t, []string{"ec2", "elasticache", "memorydb", "opensearch", "rds", "redshift"}, services) +} diff --git a/internal/commitmentopts/service.go b/internal/commitmentopts/service.go new file mode 100644 index 00000000..d85ce966 --- /dev/null +++ b/internal/commitmentopts/service.go @@ -0,0 +1,194 @@ +package commitmentopts + +import ( + "context" + "fmt" + "sync" + + "github.com/LeanerCloud/CUDly/internal/config" + "github.com/LeanerCloud/CUDly/pkg/logging" + "github.com/aws/aws-sdk-go-v2/aws" + "golang.org/x/sync/errgroup" +) + +// BuildConfigFn resolves an aws.Config for a given CloudAccount. The real +// wiring uses credentials.ResolveAWSCredentialProvider; tests substitute a +// stub that returns a zero aws.Config. +type BuildConfigFn func(ctx context.Context, account *config.CloudAccount) (aws.Config, error) + +// Service orchestrates probe-and-cache of AWS commitment options. +// +// Concurrency model: +// - Get is DB-first. The DB lookup is lock-free. +// - On a cache miss, Get acquires mu, re-checks the DB under the lock +// (to absorb a concurrent writer), then probes. The lock is per- +// process; the singleton PK on commitment_options_probe_runs handles +// cross-process races. +// - Probes run concurrently under errgroup. Any probe error aborts the +// whole attempt and NOTHING is persisted — we never want a partial +// snapshot to set the "cache warm" sentinel. +type Service struct { + store Store + accounts AccountLister + buildConfig BuildConfigFn + probers []Prober + mu sync.Mutex +} + +// New returns a Service wired to the given dependencies. Pass +// DefaultProbers() for the real probe set; tests inject stubs. +func New(store Store, accounts AccountLister, buildConfig BuildConfigFn, probers []Prober) *Service { + return &Service{ + store: store, + accounts: accounts, + buildConfig: buildConfig, + probers: probers, + } +} + +// Get returns cached Options if they exist, otherwise probes. On any +// failure (no AWS account connected, probe error) it returns ErrNoData +// without persisting; callers map that to the unavailable status. +func (s *Service) Get(ctx context.Context) (Options, error) { + // Fast path: probe run already persisted. + if opts, ok, err := s.store.Get(ctx); err != nil { + return nil, fmt.Errorf("read commitmentopts store: %w", err) + } else if ok { + return opts, nil + } + + s.mu.Lock() + defer s.mu.Unlock() + + // Re-check under the lock — a peer goroutine may have just persisted. + if opts, ok, err := s.store.Get(ctx); err != nil { + return nil, fmt.Errorf("read commitmentopts store: %w", err) + } else if ok { + return opts, nil + } + + return s.probeAndPersist(ctx) +} + +// probeAndPersist runs every configured Prober against the first enabled +// AWS account. Returns ErrNoData (WITHOUT persisting) if any step fails. +func (s *Service) probeAndPersist(ctx context.Context) (Options, error) { + account, err := s.findAWSAccount(ctx) + if err != nil { + return nil, err + } + if account == nil { + return nil, ErrNoData + } + + cfg, err := s.buildConfig(ctx, account) + if err != nil { + logging.Warnf("commitmentopts: probe aborted — build config account=%s: %v", account.ID, err) + return nil, ErrNoData + } + + // Fan out: one probe per service, first error wins. + results := make([][]Combo, len(s.probers)) + group, gctx := errgroup.WithContext(ctx) + for i, p := range s.probers { + i, p := i, p + group.Go(func() error { + combos, err := p.Probe(gctx, cfg) + if err != nil { + return fmt.Errorf("probe %s: %w", p.Service(), err) + } + results[i] = combos + return nil + }) + } + if err := group.Wait(); err != nil { + // All-or-nothing: a single service failing must NOT produce a + // half-populated cache that subsequent Get calls would trust. + // Log the first error so an operator can diagnose missing IAM + // permissions or SDK regressions without tailing per-service logs. + logging.Warnf("commitmentopts: probe aborted — account=%s: %v", account.ID, err) + return nil, ErrNoData + } + + var all []Combo + for _, r := range results { + all = append(all, r...) + } + + if err := s.store.Save(ctx, all, account.ExternalID); err != nil { + return nil, fmt.Errorf("persist commitment options: %w", err) + } + + return buildOptions(all), nil +} + +// findAWSAccount returns the first enabled AWS account, or nil if none +// exist. Nil + nil error is the "no AWS connected" signal. +func (s *Service) findAWSAccount(ctx context.Context) (*config.CloudAccount, error) { + provider := "aws" + accounts, err := s.accounts.ListCloudAccounts(ctx, config.CloudAccountFilter{Provider: &provider}) + if err != nil { + return nil, fmt.Errorf("list cloud accounts: %w", err) + } + for i := range accounts { + if accounts[i].Provider == "aws" { + return &accounts[i], nil + } + } + return nil, nil +} + +// Validate reports whether (provider, service, term, payment) is a legal +// combination according to the cached probe data. +// +// Fallback behaviour: if no probe data exists (the server has never +// successfully probed) Validate returns true so saves aren't blocked when +// we can't verify. The frontend's hardcoded rules are the user-facing +// gate; this check is belt-and-braces. +func (s *Service) Validate(ctx context.Context, provider, service string, term int, payment string) (bool, error) { + opts, err := s.Get(ctx) + if err != nil { + if err == ErrNoData { + return true, nil + } + return false, err + } + + byService, ok := opts[provider] + if !ok { + // No combos for this provider at all — nothing to validate + // against. Permissive fallback keeps parity with ErrNoData. + return true, nil + } + combos, ok := byService[service] + if !ok { + // We have data for this provider but not this service. The + // service is not commitment-capable in our probe set (e.g. + // Savings Plans) — permissive fallback. + return true, nil + } + for _, c := range combos { + if c.TermYears == term && c.Payment == payment { + return true, nil + } + } + return false, nil +} + +// buildOptions groups a flat Combo slice into the nested Options map the +// API handler returns. +func buildOptions(combos []Combo) Options { + if len(combos) == 0 { + return Options{} + } + out := make(Options) + for _, c := range combos { + byService := out[c.Provider] + if byService == nil { + byService = make(map[string][]Combo) + out[c.Provider] = byService + } + byService[c.Service] = append(byService[c.Service], c) + } + return out +} diff --git a/internal/commitmentopts/service_test.go b/internal/commitmentopts/service_test.go new file mode 100644 index 00000000..ab25aee0 --- /dev/null +++ b/internal/commitmentopts/service_test.go @@ -0,0 +1,286 @@ +package commitmentopts + +import ( + "context" + "errors" + "sync" + "sync/atomic" + "testing" + + "github.com/LeanerCloud/CUDly/internal/config" + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// fakeStore is a memory-backed Store used throughout service_test.go. It +// tracks call counts so tests can assert the Save-once invariant. +type fakeStore struct { + mu sync.Mutex + opts Options + has bool + saves int32 + saveErr error + getErr error + hasErr error + savedID string + savedCnt int + saveHook func([]Combo, string) +} + +func (f *fakeStore) Get(ctx context.Context) (Options, bool, error) { + if f.getErr != nil { + return nil, false, f.getErr + } + f.mu.Lock() + defer f.mu.Unlock() + if !f.has { + return nil, false, nil + } + return f.opts, true, nil +} + +func (f *fakeStore) Save(ctx context.Context, combos []Combo, sourceAccountID string) error { + if f.saveErr != nil { + return f.saveErr + } + atomic.AddInt32(&f.saves, 1) + f.mu.Lock() + defer f.mu.Unlock() + f.opts = buildOptions(combos) + f.has = true + f.savedID = sourceAccountID + f.savedCnt = len(combos) + if f.saveHook != nil { + f.saveHook(combos, sourceAccountID) + } + return nil +} + +func (f *fakeStore) HasData(ctx context.Context) (bool, error) { + if f.hasErr != nil { + return false, f.hasErr + } + f.mu.Lock() + defer f.mu.Unlock() + return f.has, nil +} + +// fakeAccounts returns a fixed list. +type fakeAccounts struct { + accounts []config.CloudAccount + err error +} + +func (f *fakeAccounts) ListCloudAccounts(ctx context.Context, filter config.CloudAccountFilter) ([]config.CloudAccount, error) { + if f.err != nil { + return nil, f.err + } + return f.accounts, nil +} + +// stubProber returns a fixed set of combos (or an error). +type stubProber struct { + name string + combos []Combo + err error +} + +func (s *stubProber) Service() string { return s.name } +func (s *stubProber) Probe(ctx context.Context, _ aws.Config) ([]Combo, error) { + if s.err != nil { + return nil, s.err + } + return s.combos, nil +} + +func noopBuildConfig(ctx context.Context, _ *config.CloudAccount) (aws.Config, error) { + return aws.Config{}, nil +} + +func awsAccount(id string) config.CloudAccount { + return config.CloudAccount{ID: id, Provider: "aws", ExternalID: id, Enabled: true} +} + +// --------------------------------------------------------------------------- + +func TestService_Get_DBHitShortCircuits(t *testing.T) { + // Seed the store as if a prior probe already landed. The service must + // never invoke the probers. + cached := Options{"aws": {"rds": {{Provider: "aws", Service: "rds", TermYears: 1, Payment: "all-upfront"}}}} + store := &fakeStore{opts: cached, has: true} + + probed := false + probers := []Prober{&stubProber{name: "rds", combos: []Combo{{Provider: "aws", Service: "rds", TermYears: 3, Payment: "all-upfront"}}}} + // Wrap to detect any call. + wrapped := []Prober{proberFunc{name: "rds", fn: func() ([]Combo, error) { + probed = true + return probers[0].(*stubProber).combos, nil + }}} + + accounts := &fakeAccounts{accounts: []config.CloudAccount{awsAccount("acct")}} + svc := New(store, accounts, noopBuildConfig, wrapped) + + got, err := svc.Get(context.Background()) + require.NoError(t, err) + assert.Equal(t, cached, got) + assert.False(t, probed, "probers must not run when the DB is already warm") + assert.Zero(t, atomic.LoadInt32(&store.saves)) +} + +func TestService_Get_NoAWSAccountReturnsErrNoData(t *testing.T) { + store := &fakeStore{} + accounts := &fakeAccounts{accounts: nil} + svc := New(store, accounts, noopBuildConfig, []Prober{&stubProber{name: "rds"}}) + + _, err := svc.Get(context.Background()) + assert.ErrorIs(t, err, ErrNoData) + assert.Zero(t, atomic.LoadInt32(&store.saves)) +} + +func TestService_Get_AllProbesOKPersistsAndReturns(t *testing.T) { + store := &fakeStore{} + accounts := &fakeAccounts{accounts: []config.CloudAccount{awsAccount("123456789012")}} + probers := []Prober{ + &stubProber{name: "rds", combos: []Combo{{Provider: "aws", Service: "rds", TermYears: 1, Payment: "all-upfront"}}}, + &stubProber{name: "elasticache", combos: []Combo{{Provider: "aws", Service: "elasticache", TermYears: 3, Payment: "no-upfront"}}}, + } + svc := New(store, accounts, noopBuildConfig, probers) + + got, err := svc.Get(context.Background()) + require.NoError(t, err) + require.Contains(t, got, "aws") + assert.Len(t, got["aws"]["rds"], 1) + assert.Len(t, got["aws"]["elasticache"], 1) + assert.Equal(t, int32(1), atomic.LoadInt32(&store.saves)) + assert.Equal(t, "123456789012", store.savedID) + assert.Equal(t, 2, store.savedCnt) +} + +func TestService_Get_OneProberFailsDoesNotPersist(t *testing.T) { + store := &fakeStore{} + accounts := &fakeAccounts{accounts: []config.CloudAccount{awsAccount("a")}} + boom := errors.New("boom") + probers := []Prober{ + &stubProber{name: "rds", combos: []Combo{{Provider: "aws", Service: "rds", TermYears: 1, Payment: "all-upfront"}}}, + &stubProber{name: "elasticache", err: boom}, + } + svc := New(store, accounts, noopBuildConfig, probers) + + _, err := svc.Get(context.Background()) + assert.ErrorIs(t, err, ErrNoData) + assert.Zero(t, atomic.LoadInt32(&store.saves), "partial probe results must NOT be persisted") +} + +func TestService_Get_BuildConfigErrorReturnsErrNoData(t *testing.T) { + store := &fakeStore{} + accounts := &fakeAccounts{accounts: []config.CloudAccount{awsAccount("a")}} + boom := errors.New("creds") + svc := New(store, accounts, func(context.Context, *config.CloudAccount) (aws.Config, error) { + return aws.Config{}, boom + }, []Prober{&stubProber{name: "rds"}}) + + _, err := svc.Get(context.Background()) + assert.ErrorIs(t, err, ErrNoData) + assert.Zero(t, atomic.LoadInt32(&store.saves)) +} + +func TestService_Get_StoreReadErrorBubbles(t *testing.T) { + boom := errors.New("db down") + store := &fakeStore{getErr: boom} + accounts := &fakeAccounts{accounts: []config.CloudAccount{awsAccount("a")}} + svc := New(store, accounts, noopBuildConfig, []Prober{&stubProber{name: "rds"}}) + + _, err := svc.Get(context.Background()) + require.Error(t, err) + assert.ErrorIs(t, err, boom) +} + +func TestService_Validate_PermissiveOnErrNoData(t *testing.T) { + // No AWS account → Get returns ErrNoData → Validate must return true. + store := &fakeStore{} + accounts := &fakeAccounts{accounts: nil} + svc := New(store, accounts, noopBuildConfig, nil) + + ok, err := svc.Validate(context.Background(), "aws", "rds", 1, "all-upfront") + require.NoError(t, err) + assert.True(t, ok, "missing data must not block saves") +} + +func TestService_Validate_HitAndMiss(t *testing.T) { + cached := Options{"aws": {"rds": { + {Provider: "aws", Service: "rds", TermYears: 1, Payment: "all-upfront"}, + {Provider: "aws", Service: "rds", TermYears: 3, Payment: "no-upfront"}, + }}} + store := &fakeStore{opts: cached, has: true} + accounts := &fakeAccounts{} + svc := New(store, accounts, noopBuildConfig, nil) + + ok, err := svc.Validate(context.Background(), "aws", "rds", 1, "all-upfront") + require.NoError(t, err) + assert.True(t, ok) + + ok, err = svc.Validate(context.Background(), "aws", "rds", 3, "partial-upfront") + require.NoError(t, err) + assert.False(t, ok) +} + +func TestService_Validate_UnknownProviderPermissive(t *testing.T) { + cached := Options{"aws": {"rds": {{Provider: "aws", Service: "rds", TermYears: 1, Payment: "all-upfront"}}}} + store := &fakeStore{opts: cached, has: true} + svc := New(store, &fakeAccounts{}, noopBuildConfig, nil) + + // Azure isn't in our probe set — don't block. + ok, err := svc.Validate(context.Background(), "azure", "vm", 1, "all-upfront") + require.NoError(t, err) + assert.True(t, ok) +} + +func TestService_Validate_UnknownServiceUnderKnownProviderPermissive(t *testing.T) { + cached := Options{"aws": {"rds": {{Provider: "aws", Service: "rds", TermYears: 1, Payment: "all-upfront"}}}} + store := &fakeStore{opts: cached, has: true} + svc := New(store, &fakeAccounts{}, noopBuildConfig, nil) + + // savingsplans has no probe — don't block. + ok, err := svc.Validate(context.Background(), "aws", "savingsplans", 1, "all-upfront") + require.NoError(t, err) + assert.True(t, ok) +} + +func TestService_Get_ConcurrentCallersProbeOnce(t *testing.T) { + // Serialize probes via the mutex: N concurrent Get()s on a cold store + // must result in exactly one Save. + store := &fakeStore{} + accounts := &fakeAccounts{accounts: []config.CloudAccount{awsAccount("a")}} + + var probeCount int32 + probers := []Prober{proberFunc{name: "rds", fn: func() ([]Combo, error) { + atomic.AddInt32(&probeCount, 1) + return []Combo{{Provider: "aws", Service: "rds", TermYears: 1, Payment: "all-upfront"}}, nil + }}} + svc := New(store, accounts, noopBuildConfig, probers) + + var wg sync.WaitGroup + for i := 0; i < 8; i++ { + wg.Add(1) + go func() { + defer wg.Done() + _, _ = svc.Get(context.Background()) + }() + } + wg.Wait() + + assert.Equal(t, int32(1), atomic.LoadInt32(&store.saves), "Save must run exactly once") + assert.Equal(t, int32(1), atomic.LoadInt32(&probeCount), "Probe must run exactly once for 8 concurrent callers") +} + +// proberFunc is a tiny adapter letting tests wire a closure as a Prober. +type proberFunc struct { + name string + fn func() ([]Combo, error) +} + +func (p proberFunc) Service() string { return p.name } +func (p proberFunc) Probe(ctx context.Context, _ aws.Config) ([]Combo, error) { + return p.fn() +} diff --git a/internal/commitmentopts/store_postgres.go b/internal/commitmentopts/store_postgres.go new file mode 100644 index 00000000..9d166609 --- /dev/null +++ b/internal/commitmentopts/store_postgres.go @@ -0,0 +1,125 @@ +package commitmentopts + +import ( + "context" + "fmt" + + "github.com/LeanerCloud/CUDly/internal/database" + "github.com/jackc/pgx/v5" + "github.com/jackc/pgx/v5/pgconn" +) + +// dbConn is the minimal pgx-style surface PostgresStore uses. Both +// *database.Connection and the pgxmock mocks satisfy it. Mirrors the +// interface shape in internal/config/store_postgres.go so the two stores +// can share test-container scaffolding. +type dbConn interface { + QueryRow(ctx context.Context, sql string, args ...any) pgx.Row + Query(ctx context.Context, sql string, args ...any) (pgx.Rows, error) + Exec(ctx context.Context, sql string, args ...any) (pgconn.CommandTag, error) + Begin(ctx context.Context) (pgx.Tx, error) +} + +// PostgresStore is the Postgres-backed commitmentopts.Store implementation. +type PostgresStore struct { + db dbConn +} + +// NewPostgresStore returns a store backed by the given connection. The +// connection must already have the 000039 migration applied. +func NewPostgresStore(db *database.Connection) *PostgresStore { + return &PostgresStore{db: db} +} + +// Verify PostgresStore implements Store. +var _ Store = (*PostgresStore)(nil) + +// Get returns every persisted combo grouped by provider and service, +// plus a boolean that is true iff a probe run row exists. The combos map +// may be empty even when the boolean is true — that legitimately means +// "we probed and nothing matched our normalizer". +func (s *PostgresStore) Get(ctx context.Context) (Options, bool, error) { + hasData, err := s.HasData(ctx) + if err != nil { + return nil, false, err + } + if !hasData { + return nil, false, nil + } + + rows, err := s.db.Query(ctx, + `SELECT provider, service, term_years, payment_option FROM commitment_options_combos`, + ) + if err != nil { + return nil, false, fmt.Errorf("query commitment_options_combos: %w", err) + } + defer rows.Close() + + opts := make(Options) + for rows.Next() { + var c Combo + if err := rows.Scan(&c.Provider, &c.Service, &c.TermYears, &c.Payment); err != nil { + return nil, false, fmt.Errorf("scan commitment_options_combos row: %w", err) + } + byService := opts[c.Provider] + if byService == nil { + byService = make(map[string][]Combo) + opts[c.Provider] = byService + } + byService[c.Service] = append(byService[c.Service], c) + } + if err := rows.Err(); err != nil { + return nil, false, fmt.Errorf("iterate commitment_options_combos: %w", err) + } + return opts, true, nil +} + +// HasData reports whether a probe run row exists. +func (s *PostgresStore) HasData(ctx context.Context) (bool, error) { + var exists bool + if err := s.db.QueryRow(ctx, + `SELECT EXISTS(SELECT 1 FROM commitment_options_probe_runs)`, + ).Scan(&exists); err != nil { + return false, fmt.Errorf("check commitment_options_probe_runs: %w", err) + } + return exists, nil +} + +// Save persists the probe run row and every combo transactionally. If a +// run row already exists (a second process raced us to persist), Save is +// a no-op — the singleton PK means only the first writer wins. Combo +// inserts use ON CONFLICT DO NOTHING so re-running the probe after a +// manual row clear and partial repopulation doesn't fail. +func (s *PostgresStore) Save(ctx context.Context, combos []Combo, sourceAccountID string) error { + tx, err := s.db.Begin(ctx) + if err != nil { + return fmt.Errorf("begin tx: %w", err) + } + // Rollback is a no-op after a successful Commit. + defer func() { _ = tx.Rollback(ctx) }() + + if _, err := tx.Exec(ctx, + `INSERT INTO commitment_options_probe_runs (singleton, probed_at, source_account_id) + VALUES (TRUE, NOW(), $1) + ON CONFLICT (singleton) DO NOTHING`, + sourceAccountID, + ); err != nil { + return fmt.Errorf("insert commitment_options_probe_runs: %w", err) + } + + for _, c := range combos { + if _, err := tx.Exec(ctx, + `INSERT INTO commitment_options_combos (provider, service, term_years, payment_option) + VALUES ($1, $2, $3, $4) + ON CONFLICT DO NOTHING`, + c.Provider, c.Service, c.TermYears, c.Payment, + ); err != nil { + return fmt.Errorf("insert commitment_options_combos: %w", err) + } + } + + if err := tx.Commit(ctx); err != nil { + return fmt.Errorf("commit tx: %w", err) + } + return nil +} diff --git a/internal/commitmentopts/store_postgres_test.go b/internal/commitmentopts/store_postgres_test.go new file mode 100644 index 00000000..7a6d1a7c --- /dev/null +++ b/internal/commitmentopts/store_postgres_test.go @@ -0,0 +1,134 @@ +//go:build integration +// +build integration + +package commitmentopts_test + +import ( + "context" + "path/filepath" + "runtime" + "sort" + "testing" + + "github.com/LeanerCloud/CUDly/internal/commitmentopts" + "github.com/LeanerCloud/CUDly/internal/database/postgres/migrations" + "github.com/LeanerCloud/CUDly/internal/database/postgres/testhelpers" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// getMigrationsPath mirrors the helper in internal/config/store_postgres_test.go — +// the migrations directory is a sibling of internal//. +func getMigrationsPath() string { + _, filename, _, _ := runtime.Caller(0) + return filepath.Join(filepath.Dir(filename), "..", "database", "postgres", "migrations") +} + +// setupStore stands up a fresh Postgres container and returns a store +// bound to it. The container is torn down via t.Cleanup. +func setupStore(ctx context.Context, t *testing.T) *commitmentopts.PostgresStore { + t.Helper() + + container, err := testhelpers.SetupPostgresContainer(ctx, t) + require.NoError(t, err) + t.Cleanup(func() { container.Cleanup(ctx) }) + + require.NoError(t, migrations.RunMigrations(ctx, container.DB.Pool(), getMigrationsPath(), "", "")) + return commitmentopts.NewPostgresStore(container.DB) +} + +func TestPostgresStore_HasData_EmptyIsFalse(t *testing.T) { + ctx := context.Background() + store := setupStore(ctx, t) + + has, err := store.HasData(ctx) + require.NoError(t, err) + assert.False(t, has) +} + +func TestPostgresStore_Get_EmptyReturnsFalseBool(t *testing.T) { + ctx := context.Background() + store := setupStore(ctx, t) + + opts, has, err := store.Get(ctx) + require.NoError(t, err) + assert.False(t, has) + assert.Empty(t, opts) +} + +func TestPostgresStore_SaveAndGet(t *testing.T) { + ctx := context.Background() + store := setupStore(ctx, t) + + combos := []commitmentopts.Combo{ + {Provider: "aws", Service: "rds", TermYears: 1, Payment: "all-upfront"}, + {Provider: "aws", Service: "rds", TermYears: 1, Payment: "no-upfront"}, + {Provider: "aws", Service: "rds", TermYears: 3, Payment: "all-upfront"}, + {Provider: "aws", Service: "elasticache", TermYears: 1, Payment: "partial-upfront"}, + } + require.NoError(t, store.Save(ctx, combos, "123456789012")) + + has, err := store.HasData(ctx) + require.NoError(t, err) + assert.True(t, has) + + opts, has, err := store.Get(ctx) + require.NoError(t, err) + assert.True(t, has) + require.Contains(t, opts, "aws") + require.Contains(t, opts["aws"], "rds") + require.Contains(t, opts["aws"], "elasticache") + + rds := opts["aws"]["rds"] + sort.Slice(rds, func(i, j int) bool { + if rds[i].TermYears != rds[j].TermYears { + return rds[i].TermYears < rds[j].TermYears + } + return rds[i].Payment < rds[j].Payment + }) + assert.Equal(t, []commitmentopts.Combo{ + {Provider: "aws", Service: "rds", TermYears: 1, Payment: "all-upfront"}, + {Provider: "aws", Service: "rds", TermYears: 1, Payment: "no-upfront"}, + {Provider: "aws", Service: "rds", TermYears: 3, Payment: "all-upfront"}, + }, rds) + + assert.Equal(t, []commitmentopts.Combo{ + {Provider: "aws", Service: "elasticache", TermYears: 1, Payment: "partial-upfront"}, + }, opts["aws"]["elasticache"]) +} + +func TestPostgresStore_Save_IdempotentOnRerun(t *testing.T) { + ctx := context.Background() + store := setupStore(ctx, t) + + combos := []commitmentopts.Combo{ + {Provider: "aws", Service: "rds", TermYears: 1, Payment: "all-upfront"}, + } + require.NoError(t, store.Save(ctx, combos, "111111111111")) + // Second call must not fail (singleton PK conflict on the run row, + // compound PK conflict on the combos row). + require.NoError(t, store.Save(ctx, combos, "222222222222")) + + opts, has, err := store.Get(ctx) + require.NoError(t, err) + assert.True(t, has) + assert.Equal(t, combos, opts["aws"]["rds"]) +} + +func TestPostgresStore_Save_EmptyCombosStillMarksRun(t *testing.T) { + ctx := context.Background() + store := setupStore(ctx, t) + + // A legitimate "AWS returned no matching offerings" scenario — we + // persist the run row so the next Get short-circuits via the bool. + require.NoError(t, store.Save(ctx, nil, "333333333333")) + + has, err := store.HasData(ctx) + require.NoError(t, err) + assert.True(t, has) + + opts, has, err := store.Get(ctx) + require.NoError(t, err) + assert.True(t, has) + assert.Empty(t, opts) +} diff --git a/internal/commitmentopts/types.go b/internal/commitmentopts/types.go new file mode 100644 index 00000000..abe06e87 --- /dev/null +++ b/internal/commitmentopts/types.go @@ -0,0 +1,86 @@ +// Package commitmentopts discovers which (term, payment) commitment tuples +// each AWS commitment-capable service actually sells, persists the result, +// and exposes it to the API layer so the frontend can hide impossible +// combinations instead of hardcoding them. +// +// The package never blocks a save on missing data: if no probe has been +// persisted yet (or the probe failed), Service.Validate falls back to +// permissive-true and the frontend's hardcoded rules still gate the UI. +package commitmentopts + +import ( + "context" + "errors" + + "github.com/LeanerCloud/CUDly/internal/config" + "github.com/aws/aws-sdk-go-v2/aws" +) + +// Combo is one (provider, service, term, payment) tuple harvested from the +// reserved-offerings APIs. TermYears is 1 or 3; Payment is one of +// "all-upfront", "partial-upfront", "no-upfront". Other durations and legacy +// utilization-style payment options are dropped during normalization. +type Combo struct { + Provider string + Service string + TermYears int + Payment string +} + +// Options groups valid combos by provider and service. Shape: +// +// Options["aws"]["rds"] = []Combo{...} +// +// Only AWS is ever populated today — Azure and GCP stay hardcoded in the +// frontend because their commitment APIs don't have an equivalent probe. +type Options map[string]map[string][]Combo + +// Prober probes a single AWS service's reserved-offerings API and returns +// every unique (term, payment) tuple it exposes. Implementations live in +// probe.go, one per service. +type Prober interface { + // Service returns the canonical service name used as the Options key + // and persisted in commitment_options_combos.service. + Service() string + + // Probe issues paginated Describe*Offerings calls against a canonical + // small instance type, normalizes the results, and returns unique + // Combos. Errors bubble up — the orchestrating Service treats ANY + // probe failure as "don't persist" (all-or-nothing). + Probe(ctx context.Context, cfg aws.Config) ([]Combo, error) +} + +// Store persists probe results. Implementations must treat the combos table +// as idempotent (ON CONFLICT DO NOTHING) so concurrent writers don't trip +// each other. +type Store interface { + // Get returns the cached Options and a boolean indicating whether a + // probe run row exists. The boolean is the authoritative "cache warm" + // signal — an empty combos slice plus a present run row means "we + // probed, AWS returned nothing matching our normalizer", and callers + // should NOT re-probe. + Get(ctx context.Context) (Options, bool, error) + + // Save writes the probe run row and all combos atomically. If a run + // row already exists (concurrent writer won the race), Save is a + // no-op rather than an error. + Save(ctx context.Context, combos []Combo, sourceAccountID string) error + + // HasData reports whether a probe run row exists. Used as a cheap + // pre-check before acquiring the prober lock. + HasData(ctx context.Context) (bool, error) +} + +// AccountLister is the narrow subset of config.StoreInterface that Service +// needs. Defining it here (rather than depending on the full interface) +// keeps tests small and avoids a circular import when the API layer wires +// up the real store. +type AccountLister interface { + ListCloudAccounts(ctx context.Context, filter config.CloudAccountFilter) ([]config.CloudAccount, error) +} + +// ErrNoData signals "we don't have cached commitment options and can't +// produce them right now" (no AWS account connected, or a probe failed). +// Callers MUST NOT treat this as a hard error — the API handler maps it to +// `{"status":"unavailable"}` and Validate maps it to permissive-true. +var ErrNoData = errors.New("commitment options unavailable") diff --git a/internal/database/postgres/migrations/000039_commitment_options_cache.down.sql b/internal/database/postgres/migrations/000039_commitment_options_cache.down.sql new file mode 100644 index 00000000..fd7d6bb3 --- /dev/null +++ b/internal/database/postgres/migrations/000039_commitment_options_cache.down.sql @@ -0,0 +1,2 @@ +DROP TABLE IF EXISTS commitment_options_combos; +DROP TABLE IF EXISTS commitment_options_probe_runs; diff --git a/internal/database/postgres/migrations/000039_commitment_options_cache.up.sql b/internal/database/postgres/migrations/000039_commitment_options_cache.up.sql new file mode 100644 index 00000000..7d09fdfb --- /dev/null +++ b/internal/database/postgres/migrations/000039_commitment_options_cache.up.sql @@ -0,0 +1,23 @@ +-- commitment_options_probe_runs is a singleton table: one row per +-- successful probe of the AWS reserved-offerings APIs. Its presence is +-- the "is the cache warm?" sentinel. An empty table means the backend +-- has never persisted a probe (or the admin cleared it to force a +-- refresh). A non-empty table plus an empty commitment_options_combos +-- means "we probed and AWS genuinely has no matching offerings" — the +-- frontend will fall back to its hardcoded defaults in that case. +CREATE TABLE IF NOT EXISTS commitment_options_probe_runs ( + singleton BOOLEAN PRIMARY KEY DEFAULT TRUE CHECK (singleton), + probed_at TIMESTAMPTZ NOT NULL, + source_account_id TEXT NOT NULL +); + +-- commitment_options_combos records each supported (provider, service, +-- term, payment) tuple harvested from the reserved-offerings APIs. The +-- compound PK makes re-persists idempotent under ON CONFLICT DO NOTHING. +CREATE TABLE IF NOT EXISTS commitment_options_combos ( + provider TEXT NOT NULL, + service TEXT NOT NULL, + term_years INT NOT NULL, + payment_option TEXT NOT NULL, + PRIMARY KEY (provider, service, term_years, payment_option) +); diff --git a/internal/server/app.go b/internal/server/app.go index ec77d443..570fb514 100644 --- a/internal/server/app.go +++ b/internal/server/app.go @@ -15,6 +15,7 @@ import ( "github.com/LeanerCloud/CUDly/internal/analytics" "github.com/LeanerCloud/CUDly/internal/api" "github.com/LeanerCloud/CUDly/internal/auth" + "github.com/LeanerCloud/CUDly/internal/commitmentopts" "github.com/LeanerCloud/CUDly/internal/config" "github.com/LeanerCloud/CUDly/internal/credentials" "github.com/LeanerCloud/CUDly/internal/database" @@ -26,6 +27,7 @@ import ( "github.com/LeanerCloud/CUDly/internal/scheduler" "github.com/LeanerCloud/CUDly/internal/secrets" "github.com/LeanerCloud/CUDly/pkg/logging" + "github.com/aws/aws-sdk-go-v2/aws" awsconfig "github.com/aws/aws-sdk-go-v2/config" "github.com/aws/aws-sdk-go-v2/service/sts" "github.com/jackc/pgx/v5/pgxpool" @@ -583,6 +585,36 @@ func (app *Application) reinitializeAfterConnect(dbConn *database.Connection) er IsLambda: app.appConfig.IsLambda, }) + // Build the commitment-options probe/cache service. It lazily probes + // AWS reserved-offerings APIs through the first enabled AWS account and + // persists the result so subsequent calls come from the DB. Failures + // anywhere along the chain (no account connected, probe denied) collapse + // to ErrNoData; the API handler and save-side validator both degrade + // gracefully, so this is wired unconditionally. + commitmentOpts := commitmentopts.New( + commitmentopts.NewPostgresStore(dbConn), + app.Config, + func(ctx context.Context, acct *config.CloudAccount) (aws.Config, error) { + stsClient := sts.NewFromConfig(awsCfg) + prov, err := credentials.ResolveAWSCredentialProvider(ctx, acct, credStore, stsClient) + if err != nil { + return aws.Config{}, err + } + // us-east-1 hardcoded because reserved offerings are global + // facts (not AZ-scoped), and us-east-1 has the widest + // instance-type coverage. This fails silently for GovCloud + // / China-partition accounts — those return ErrNoData from + // the probe and the frontend falls back to hardcoded rules, + // which is acceptable since those partitions rarely need + // dynamic commitment detection. + return awsconfig.LoadDefaultConfig(ctx, + awsconfig.WithCredentialsProvider(prov), + awsconfig.WithRegion("us-east-1"), + ) + }, + commitmentopts.DefaultProbers(), + ) + // Update API handler with new config store, scheduler, and rate limiter. // AnalyticsClient is Postgres-backed (see api.NewPostgresAnalyticsClient) — // it aggregates purchase_history on demand so the History UI charts work @@ -603,6 +635,7 @@ func (app *Application) reinitializeAfterConnect(dbConn *database.Connection) er AnalyticsClient: api.NewPostgresAnalyticsClient(dbConn), OIDCSigner: app.signer, OIDCIssuerURL: resolveOIDCIssuerURL(app.appConfig), + CommitmentOpts: commitmentOpts, }) if app.API == nil { return fmt.Errorf("failed to create API handler")