From 696cf5dcc93c462d594b1330e4c385c1432dd04c Mon Sep 17 00:00:00 2001 From: Cristian Magherusan-Stanciu Date: Sun, 3 May 2026 13:24:20 +0200 Subject: [PATCH] =?UTF-8?q?test(api):=20pin=20reshape=20security=20regress?= =?UTF-8?q?ions=20=E2=80=94=20STS-error=20fail-closed=20+=20empty-region?= =?UTF-8?q?=20normalization=20(closes=20#151)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds two regression-guard tests covering the security paths PR #79 + follow-up commits f44b06567 and afc3aa1ff hardened. Both fixes landed without coverage; a future refactor could silently break either gate. Test 1 — TestResolveAWSCloudAccountID_FailsClosedOnSTSError Pins commit f44b06567's multi-tenant safety contract: when STS GetCallerIdentity fails (transient AWS error, denied perm, expired token), resolveAWSCloudAccountID propagates the error rather than returning ("", nil). The empty-with-no-error path would have purchaseRecLookupFromStore skip the AccountIDs filter and surface another tenant's recs — exactly the leak the production fix prevents. Mock injection: a failing http.RoundTripper installed on the handler's pre-sealed aws.Config makes STS GetCallerIdentity fail without hitting the network. Static dummy credentials + an explicit region let the SDK's signer compose the request before the failing transport runs (otherwise the SDK would error with MissingRegion / NoCredentialProviders before we reached the STS branch). The test asserts the wrapped error AND that ListCloudAccounts was never invoked — proving the resolver short-circuited before the DB read. Test 2 — TestGetReshapeRecommendations_EmptyRegionUsesConfigRegion Pins commit afc3aa1ff's region-normalization fix: when ?region= is empty (or omitted), the handler adopts cfg.Region (the value the AWS clients are actually talking to) before threading region through to the recs lookup. Without normalization the recs query lands as Region:"" — an unscoped read that surfaces alternatives from every region onto the reshape page. Mock injection mirrors the integration-test helpers but lives in the unit-test file (no //go:build integration tag). MockConfigStore captures every ListStoredRecommendations filter via mock.Run so the assertion can pin filter.Region == cfg.Region. A guard require.NotEmpty(capturedFilters) prevents a silent-pass failure mode if the analyzer ever stops invoking the closure for synthetic RIs. Would-fail-if-reverted verification (manual, before commit): - Commenting out the `if region == "" { region = cfg.Region }` block in handler_ri_exchange.go made Test 2 fail with the captured filter showing Region:"" instead of "us-west-2". - Changing the STS-error branch in exchange_lookup.go from `return "", fmt.Errorf(...)` back to `return "", nil` made Test 1 fail with "An error is expected but got nil". No production code modified — additive tests only. --- internal/api/exchange_lookup_test.go | 79 +++++++++++++ internal/api/handler_ri_exchange_test.go | 142 +++++++++++++++++++++++ 2 files changed, 221 insertions(+) diff --git a/internal/api/exchange_lookup_test.go b/internal/api/exchange_lookup_test.go index 5eedcb21..084db892 100644 --- a/internal/api/exchange_lookup_test.go +++ b/internal/api/exchange_lookup_test.go @@ -3,13 +3,26 @@ package api import ( "context" "errors" + "net/http" "testing" "github.com/LeanerCloud/CUDly/internal/config" + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/credentials" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) +// failingRoundTripper is an http.RoundTripper that fails every request +// with a fixed error. Used to inject an STS GetCallerIdentity failure +// into a Handler's pre-seeded aws.Config without dialling the network +// or relying on real AWS credentials. +type failingRoundTripper struct{ err error } + +func (f failingRoundTripper) RoundTrip(_ *http.Request) (*http.Response, error) { + return nil, f.err +} + // fakeRecsLister captures the filter passed to ListStoredRecommendations // so tests can assert region / account / provider scoping landed in the // SQL query. Returns a configurable result set or error. @@ -178,6 +191,72 @@ func TestPurchaseRecLookupFromStore_ThreeYearTerm(t *testing.T) { "3-year rec must serialise to 3 × 31_536_000s for the term-match guard") } +// TestResolveAWSCloudAccountID_FailsClosedOnSTSError pins the +// multi-tenant safety contract introduced by commit f44b06567: when +// STS GetCallerIdentity fails (transient AWS error, denied IAM perm, +// expired token), resolveAWSCloudAccountID MUST propagate the error +// rather than returning ("", nil) — the latter would have +// purchaseRecLookupFromStore skip the AccountIDs filter and serve up +// another tenant's recs to the caller. +// +// Mock injection strategy: pre-seal h.awsCfg with an aws.Config whose +// HTTPClient fails every request. CUDLY_SOURCE_CLOUD=aws forces +// resolveAWSCallerIdentity past its non-AWS short-circuit, so STS +// GetCallerIdentity is the next (and only) HTTP call — guaranteed +// to fail via the transport stub. Static credentials + an explicit +// region are required because the SDK signs the request before +// invoking the transport; without them the SDK errors with +// MissingRegion / NoCredentialProviders before the failing transport +// runs and the test would pass for the wrong reason. +// +// Asserts BOTH the error wraps the production fix's identifying +// prefix ("resolve source aws account for reshape scope") AND that +// ListCloudAccounts was never invoked — proving the fail-closed +// branch aborted the resolver before the DB read. Reverting the +// f44b06567 fix (changing the STS-error branch back to `return "", +// nil`) makes this test fail loudly. +func TestResolveAWSCloudAccountID_FailsClosedOnSTSError(t *testing.T) { + // Force resolveAWSCallerIdentity past its sourceCloud()!="aws" + // short-circuit so STS is actually invoked. + t.Setenv("CUDLY_SOURCE_CLOUD", "aws") + + // MockConfigStore with a ListCloudAccountsFn sentinel: if the + // resolver ever reaches the DB read, the flag flips to true and + // the test fails. The fail-closed branch must abort BEFORE this + // point. + var listCloudAccountsCalled bool + mockStore := &MockConfigStore{ + ListCloudAccountsFn: func(_ context.Context, _ config.CloudAccountFilter) ([]config.CloudAccount, error) { + listCloudAccountsCalled = true + return nil, nil + }, + } + + stsErr := errors.New("simulated sts get-caller-identity failure") + h := &Handler{config: mockStore} + // Pre-seal awsCfgOnce so resolveAWSCallerIdentity skips + // LoadDefaultConfig and uses our injected config directly. + h.awsCfgOnce.Do(func() { + h.awsCfg = aws.Config{ + Region: "us-east-1", + // Static dummy creds so the SDK's signer doesn't probe + // IMDS / shared-config files before sending the (doomed) + // HTTP request through the failing transport. + Credentials: credentials.NewStaticCredentialsProvider("AKIA-test", "test-secret", ""), + HTTPClient: &http.Client{Transport: failingRoundTripper{err: stsErr}}, + } + }) + + id, err := h.resolveAWSCloudAccountID(context.Background()) + require.Error(t, err, "STS failure must propagate — fail-closed contract for multi-tenant scope filter") + assert.Empty(t, id, "fail-closed sentinel: empty account ID with non-nil error") + assert.Contains(t, err.Error(), "resolve source aws account for reshape scope", + "error must wrap the resolver's production prefix so the cause stays attributable") + assert.False(t, listCloudAccountsCalled, + "ListCloudAccounts must NOT be invoked after an STS error — the resolver must short-circuit "+ + "before the DB read or the unscoped query path could leak another tenant's recs") +} + // TestSplitInstanceType pins the local instance-type parser used by // the mapping helper. Mirrors the pkg/exchange parser to avoid // exporting a general-purpose helper this package doesn't need. diff --git a/internal/api/handler_ri_exchange_test.go b/internal/api/handler_ri_exchange_test.go index 5d7b8feb..61c7d970 100644 --- a/internal/api/handler_ri_exchange_test.go +++ b/internal/api/handler_ri_exchange_test.go @@ -6,9 +6,13 @@ import ( "time" "github.com/LeanerCloud/CUDly/internal/config" + "github.com/LeanerCloud/CUDly/providers/aws/recommendations" + ec2svc "github.com/LeanerCloud/CUDly/providers/aws/services/ec2" "github.com/aws/aws-lambda-go/events" + "github.com/aws/aws-sdk-go-v2/aws" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" ) func TestListConvertibleRIs_RequiresAdmin(t *testing.T) { @@ -316,6 +320,144 @@ func TestRejectRIExchange_MissingToken(t *testing.T) { assert.Contains(t, err.Error(), "rejection token is required") } +// fakeReshapeEC2Stub is a unit-test stub for reshapeEC2Client. +// Returns a single convertible RI so the downstream +// AnalyzeReshapingWithRecs actually invokes purchaseRecLookupFromStore's +// closure — without an RI in the input slice the recs lookup is never +// called and the region-normalization assertion would never fire +// (silent test trap). Mirrors the integration-test fake but lives +// here so this unit test runs without the //go:build integration tag. +type fakeReshapeEC2Stub struct { + instances []ec2svc.ConvertibleRI +} + +func (f *fakeReshapeEC2Stub) ListConvertibleReservedInstances(_ context.Context) ([]ec2svc.ConvertibleRI, error) { + return f.instances, nil +} + +// fakeReshapeRecsStub returns a configurable RI utilization slice so +// the reshape pipeline runs without a real Cost Explorer dependency. +// AnalyzeReshapingWithRecs only emits a recommendation (and only then +// invokes the recs lookup closure) when an RI has a utilization entry +// BELOW the threshold — without one the analyzer returns nil and the +// closure never fires, producing a silent test. Seed with util-percent +// well under the request's threshold (95) to guarantee the closure +// runs. +type fakeReshapeRecsStub struct { + utilization []recommendations.RIUtilization +} + +func (f *fakeReshapeRecsStub) GetRIUtilization(_ context.Context, _ int) ([]recommendations.RIUtilization, error) { + return f.utilization, nil +} + +// TestGetReshapeRecommendations_EmptyRegionUsesConfigRegion pins the +// region-normalization fix from commit afc3aa1ff: when the caller +// omits ?region= (or sends ?region=), the handler MUST adopt +// cfg.Region (the value the AWS SDK clients are actually talking to) +// before threading the region through to the recs lookup. Without +// the normalization the recs query lands as Region:"" — an unscoped +// read that surfaces alternatives from every region onto the reshape +// page. +// +// Mock injection strategy: build a Handler with the same factory- +// injection seams the integration test uses (reshapeEC2Factory, +// reshapeRecsFactory, reshapeAccountResolver) so the request runs +// without real AWS calls or Postgres. Pre-seal awsCfgOnce with +// Region:"us-west-2" so loadAWSConfigWithRegion(ctx, "") returns a +// config carrying that region. Wire MockConfigStore.ListStoredRecommendations +// with mock.Run to capture every filter the closure passes — the +// load-bearing assertion is filter.Region == "us-west-2" (not ""). +// +// Reverting the `if region == "" { region = cfg.Region }` block from +// handler_ri_exchange.go makes this test fail loudly with the captured +// filter showing Region:"". +func TestGetReshapeRecommendations_EmptyRegionUsesConfigRegion(t *testing.T) { + const cfgRegion = "us-west-2" + + mockStore := &MockConfigStore{} + // mock.Run captures every invocation's arguments without affecting + // the return value. Returning nil/nil from ListStoredRecommendations + // means "no recs in this region" which the downstream pipeline + // treats as empty alternatives — fine for our purposes. + var capturedFilters []config.RecommendationFilter + mockStore.On("ListStoredRecommendations", mock.Anything, mock.Anything). + Return([]config.RecommendationRecord(nil), nil). + Run(func(args mock.Arguments) { + capturedFilters = append(capturedFilters, args.Get(1).(config.RecommendationFilter)) + }) + + h := &Handler{ + config: mockStore, + auth: &mockAuthForExchange{}, + // Inject one convertible RI so AnalyzeReshapingWithRecs + // actually drives purchaseRecLookupFromStore — otherwise the + // closure never fires and the assertion is silent. + reshapeEC2Factory: func(_ aws.Config) reshapeEC2Client { + return &fakeReshapeEC2Stub{ + instances: []ec2svc.ConvertibleRI{ + {ReservedInstanceID: "ri-1", InstanceType: "m5.xlarge", InstanceCount: 1, CurrencyCode: "USD"}, + }, + } + }, + reshapeRecsFactory: func(_ aws.Config) reshapeRecsClient { + // Util well below threshold (95) → analyzeRI emits a + // recommendation → AnalyzeReshapingWithRecs invokes the + // recs lookup closure → MockConfigStore.ListStoredRecommendations + // captures the filter the assertion below inspects. + return &fakeReshapeRecsStub{ + utilization: []recommendations.RIUtilization{ + {ReservedInstanceID: "ri-1", UtilizationPercent: 50.0}, + }, + } + }, + // Bypass STS so the test is hermetic. Empty cloud-account ID + // is the legitimate "no scope filter" path; this test isolates + // the region-normalization regression, not account scoping. + reshapeAccountResolver: func(_ context.Context) (string, error) { return "", nil }, + } + // Pre-seal awsCfgOnce with the cfg.Region the handler must adopt + // when the caller sends an empty ?region=. loadAWSConfigWithRegion + // returns this config unmodified (region override is skipped on + // empty input), so cfg.Region == cfgRegion downstream. + h.awsCfgOnce.Do(func() { + h.awsCfg = aws.Config{Region: cfgRegion} + }) + + req := &events.LambdaFunctionURLRequest{ + Headers: map[string]string{"authorization": "Bearer test-token"}, + QueryStringParameters: map[string]string{ + // Explicit empty value — the regression case from issue #151. + // Equivalent to omitting the key entirely (Go map zero-value). + "region": "", + "lookback_days": "30", + "threshold": "95.0", + }, + } + + _, err := h.getReshapeRecommendations(context.Background(), req) + require.NoError(t, err, "reshape handler must succeed when region is empty (cfg.Region used)") + + // Guard against a silent test: if AnalyzeReshapingWithRecs ever + // stops invoking the closure for synthetic RIs, the region check + // would vacuously pass. Require at least one captured filter. + require.NotEmpty(t, capturedFilters, + "recs lookup must be invoked at least once — without it the region-normalization "+ + "assertion would silently pass even if the fix were reverted") + + // Load-bearing assertion: every captured filter must carry the + // normalized cfg.Region value, NOT the empty string the request + // arrived with. If the `if region == "" { region = cfg.Region }` + // block in getReshapeRecommendations is removed, this fails. + for i, f := range capturedFilters { + assert.Equal(t, cfgRegion, f.Region, + "capturedFilters[%d].Region must equal cfg.Region (%q), not %q — empty ?region= "+ + "must normalize to cfg.Region or the recs lookup runs unscoped and leaks "+ + "alternatives from other regions onto the reshape page (commit afc3aa1ff)", + i, cfgRegion, f.Region) + } +} + // Suppress unused import warnings var _ = mock.Anything var _ = time.Now