From aca59388a249530bf70b49fe47395dc662e36fcf Mon Sep 17 00:00:00 2001 From: Cristian Magherusan-Stanciu Date: Sun, 3 May 2026 13:24:35 +0200 Subject: [PATCH] feat(azure compute): populate vCPU + MemoryGB via cached SKU catalogue (closes #148) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #97 added VCPU + MemoryGB fields to common.ComputeDetails (with omitempty JSON tags). PR #81 introduced the lazy SKU-catalogue pattern for cache/cosmosdb/database but explicitly scoped back compute because the destination fields didn't exist. Both prerequisites are now in place; this wires Azure compute to the same pattern. Implementation mirrors cache/database verbatim for consistency: - New unexported vmSKUEntry{vCPUs, memoryGB}. - New skuCacheOnce sync.Once + skuCacheMap field on ComputeClient. - cachedSKULookup(ctx, skuName) lazily triggers the catalogue fetch on first call; subsequent calls are O(1) map lookups. ok=false on miss or fetch error. - fetchSKUCatalogue reuses the existing createResourceSKUsPager and isAvailableInRegion helpers (already used by GetValidResourceTypes), walks every page, and reduces virtualMachines SKUs into the map. Returns nil on error so the sync.Once-gated cache stays nil and converters fall back to the empty-fields path with a one-time WARN. - extractVMSKUCapabilities pulls vCPUs (Atoi) and MemoryGB (ParseFloat) from the SKU's Capabilities name/value list. Unparseable or missing capabilities → 0, treated as "unknown". - populateVMSKUMapFromPage was extracted out of fetchSKUCatalogue to stay under the project's gocyclo=10 threshold (matches the cache/database extraction pattern from PR #81). - convertAzureVMRecommendation now takes ctx (was _) and populates Details.VCPU/MemoryGB from the cache when both >0. The single caller GetRecommendations already passes ctx; no public API change. Behaviour preserved on failure: a transient ResourceSKUsClient error no longer breaks the conversion — VCPU/MemoryGB stay at 0 (omitempty hides them from API payloads), the rest of Details is populated from the recommendation payload as before, and a WARN is logged once per client lifetime. UX improvement: common.ComputeDetails.GetDetailDescription appends " ( vCPU / GB)" when both fields are >0, so Azure VM recommendation summaries now include the size string instead of the SKU name alone. Tests added in providers/azure/services/compute/client_test.go (file-scoped vmSKUCatalogueMockPager keeps the shared mocks.MockResourceSKUsPager surface untouched, mirroring the cosmosdb / cache test convention): - TestComputeClient_ConvertAzureVMRecommendation_PopulatesVCPUAndMemoryFromSKUCache: catalogue hit populates VCPU=2, MemoryGB=8 for Standard_D2s_v3. - TestComputeClient_ConvertAzureVMRecommendation_PagerErrorFallsBack: fetch error leaves both fields at 0; conversion succeeds. - TestComputeClient_ConvertAzureVMRecommendation_NoMatchLeavesFieldsZero: catalogue miss leaves both fields at 0; conversion succeeds. - TestComputeClient_CachedSKULookup_FetchedOnce: 10 lookups trigger exactly 1 NextPage call (pins the N+1 invariant per PR #81). Out of scope (separate follow-ups, per the issue body): - AWS / GCP compute converter wiring. - Platform / Tenancy / Scope enrichment (require non-ResourceSKUs Azure data sources). Verification: gofmt clean; go vet ./... clean; gocyclo -over 10 finds no offenders in the modified file; go test github.com/LeanerCloud/CUDly/providers/azure/services/compute → 42 passing (4 new + all existing). The pre-existing providers/azure/services/search build failure (missing go.sum entry) is unrelated to this change. Closes #148 --- providers/azure/services/compute/client.go | 175 ++++++++++++++++-- .../azure/services/compute/client_test.go | 172 +++++++++++++++++ 2 files changed, 330 insertions(+), 17 deletions(-) diff --git a/providers/azure/services/compute/client.go b/providers/azure/services/compute/client.go index 3ca8d2ca..bc79c123 100644 --- a/providers/azure/services/compute/client.go +++ b/providers/azure/services/compute/client.go @@ -9,6 +9,7 @@ import ( "log" "net/http" "net/url" + "strconv" "strings" "sync" "time" @@ -20,6 +21,7 @@ import ( "github.com/google/uuid" "github.com/LeanerCloud/CUDly/pkg/common" + "github.com/LeanerCloud/CUDly/pkg/logging" "github.com/LeanerCloud/CUDly/providers/azure/internal/httpclient" "github.com/LeanerCloud/CUDly/providers/azure/internal/pricing" "github.com/LeanerCloud/CUDly/providers/azure/internal/recommendations" @@ -48,6 +50,20 @@ type HTTPClient interface { Do(req *http.Request) (*http.Response, error) } +// vmSKUEntry holds the SKU-catalogue-derived fields the converter +// wants for each VM SKU. Sourced from +// armcompute.ResourceSKU.Capabilities (a name/value-pair list): +// - vCPUs: Capabilities[Name=="vCPUs"].Value, parsed as int. +// - memoryGB: Capabilities[Name=="MemoryGB"].Value, parsed as float64. +// +// Either field stays at the zero value when the capability is missing +// or unparseable; common.ComputeDetails treats 0 as "unknown" (the +// JSON tags on VCPU/MemoryGB are omitempty). +type vmSKUEntry struct { + vCPUs int + memoryGB float64 +} + // ComputeClient handles Azure VM Reserved Instances type ComputeClient struct { cred azcore.TokenCredential @@ -63,6 +79,17 @@ type ComputeClient struct { // Microsoft.Capacity provider registration check (cached per client lifetime) capacityProviderOnce sync.Once capacityProviderErr error + + // Lazy SKU catalogue cache. armcompute.ResourceSKUsClient.NewListPager + // returns every SKU available to the subscription with its + // Capabilities (vCPUs, MemoryGB). Fetched ONCE per client lifetime; + // subsequent converter calls in the same GetRecommendations run hit + // the in-memory map. A failed fetch leaves skuCacheMap nil and + // converters fall back to VCPU=0/MemoryGB=0 with a WARN log — the + // conversion itself does NOT fail (graceful-degradation contract, + // matches cache/cosmosdb/database from PR #81). + skuCacheOnce sync.Once + skuCacheMap map[string]vmSKUEntry } // NewClient creates a new Azure Compute client @@ -672,21 +699,35 @@ func extractVMPricing(items []AzureRetailPriceItem, termYears int) (onDemand, re // providers/azure/internal/recommendations so the four Azure service // converters share the same type-assertion + nil-guard ladder. // -// Compute is NOT yet wired to the batched-SKU-catalogue lookup landed -// for cache/cosmosdb/database. Reason: common.ComputeDetails (in -// pkg/common/types.go) currently only exposes InstanceType, Platform, -// Tenancy, Scope. The two enrichments the SKU catalogue would supply -// for compute are vCPU and MemoryGB — neither of which has a field on -// ComputeDetails to write into. Adding new fields to a shared struct -// touches every cloud provider + every consumer (frontend, common -// matchers, three other providers' converters), which is well outside -// the scope of this perf change. Tracked as a follow-up — see -// known_issues/10_azure_provider.md. -func (c *ComputeClient) convertAzureVMRecommendation(_ context.Context, azureRec armconsumption.ReservationRecommendationClassification) *common.Recommendation { +// Details.VCPU and Details.MemoryGB are enriched from a lazily-cached +// armcompute.ResourceSKUsClient catalogue (cachedSKULookup). The +// catalogue is fetched ONCE per client lifetime; converter calls in +// the same GetRecommendations run share the in-memory map (the N+1 +// invariant pinned by TestComputeClient_CachedSKULookup_FetchedOnce). +// On catalogue-fetch failure or cache miss, both fields stay at 0 +// (the omitempty JSON tags hide them from API payloads) and the +// conversion still succeeds — matches the graceful-degradation +// contract from cache/cosmosdb/database in PR #81. +// +// Platform / Tenancy / Scope still require additional Azure data +// sources (consumption usage records, dedicated-host inventory) and +// remain unpopulated — out of scope for this issue. +func (c *ComputeClient) convertAzureVMRecommendation(ctx context.Context, azureRec armconsumption.ReservationRecommendationClassification) *common.Recommendation { f := recommendations.Extract(azureRec) if f == nil { return nil } + details := common.ComputeDetails{ + InstanceType: f.ResourceType, + } + if entry, ok := c.cachedSKULookup(ctx, f.ResourceType); ok { + if entry.vCPUs > 0 { + details.VCPU = entry.vCPUs + } + if entry.memoryGB > 0 { + details.MemoryGB = entry.memoryGB + } + } return &common.Recommendation{ Provider: common.ProviderAzure, Service: common.ServiceCompute, @@ -701,11 +742,111 @@ func (c *ComputeClient) convertAzureVMRecommendation(_ context.Context, azureRec Term: f.Term, PaymentOption: "upfront", Timestamp: time.Now(), - // Only InstanceType is safely derivable from the payload. Platform, - // Tenancy, Scope are armcompute.ResourceSKUsClient territory and - // are deferred — see the function godoc for the scope decision. - Details: common.ComputeDetails{ - InstanceType: f.ResourceType, - }, + Details: details, + } +} + +// cachedSKULookup returns the SKU catalogue entry for skuName, fetching +// the catalogue lazily on first call. The catalogue is fetched ONCE per +// client lifetime via armcompute.ResourceSKUsClient.NewListPager; +// subsequent calls are O(1) map lookups. ok=false on cache miss OR +// catalogue-fetch failure — the caller falls back to VCPU=0 / MemoryGB=0 +// rather than failing the whole conversion. +func (c *ComputeClient) cachedSKULookup(ctx context.Context, skuName string) (vmSKUEntry, bool) { + c.skuCacheOnce.Do(func() { + c.skuCacheMap = c.fetchSKUCatalogue(ctx) + }) + if c.skuCacheMap == nil { + return vmSKUEntry{}, false + } + entry, ok := c.skuCacheMap[skuName] + return entry, ok +} + +// fetchSKUCatalogue performs the single ResourceSKUsClient.NewListPager +// walk and reduces the response into a name->vmSKUEntry map keyed by +// SKU.Name (matches the recommendation engine's ResourceType output). +// +// Filters out non-VM resource types and SKUs not available in c.region +// (mirrors the existing extractVMSizeIfValid filter used by +// GetValidResourceTypes — a SKU listed for a different region is not +// safe to attribute to a recommendation in this client's region). +// +// Returns nil on pager-create or page-fetch error so the +// sync.Once-gated cache field stays nil and cachedSKULookup falls back +// to the empty-fields path. The fetch error is logged WARN once. +func (c *ComputeClient) fetchSKUCatalogue(ctx context.Context) map[string]vmSKUEntry { + pager, err := c.createResourceSKUsPager() + if err != nil { + logging.Warnf("azure compute: SKU catalogue pager create failed for region %s: %v — Details.VCPU/MemoryGB left at 0", c.region, err) + return nil + } + out := make(map[string]vmSKUEntry) + for pager.More() { + page, err := pager.NextPage(ctx) + if err != nil { + logging.Warnf("azure compute: SKU catalogue page fetch failed for region %s: %v — partial cache (%d entries) discarded, Details.VCPU/MemoryGB left at 0", c.region, err, len(out)) + return nil + } + c.populateVMSKUMapFromPage(out, page.Value) + } + return out +} + +// populateVMSKUMapFromPage writes one vmSKUEntry per qualifying VM SKU +// found in the page into out. Filters non-VM resource types and SKUs +// not available in c.region. First-write-wins on duplicate SKU names — +// ResourceSKUs returns each SKU once per subscription, but defending +// against duplicates keeps the cache deterministic regardless of pager +// order. Extracted out of fetchSKUCatalogue to keep that function +// under the cyclomatic-complexity threshold enforced by the +// pre-commit hook (matches the cache/database extraction pattern from +// PR #81). +func (c *ComputeClient) populateVMSKUMapFromPage(out map[string]vmSKUEntry, skus []*armcompute.ResourceSKU) { + for _, sku := range skus { + if sku == nil || sku.Name == nil { + continue + } + if sku.ResourceType == nil || *sku.ResourceType != "virtualMachines" { + continue + } + if !c.isAvailableInRegion(sku, c.region) { + continue + } + if _, exists := out[*sku.Name]; exists { + continue + } + vCPUs, memoryGB := extractVMSKUCapabilities(sku) + out[*sku.Name] = vmSKUEntry{vCPUs: vCPUs, memoryGB: memoryGB} + } +} + +// extractVMSKUCapabilities pulls the vCPUs and MemoryGB capabilities +// out of an armcompute.ResourceSKU's Capabilities name/value list. +// Returns (0, 0) when the capability is missing or unparseable — +// callers treat the zero value as "unknown" (omitempty JSON tag on +// common.ComputeDetails). +// +// Extracted out of fetchSKUCatalogue to keep that function under the +// cyclomatic-complexity threshold enforced by the pre-commit hook +// (matches the cache/database extraction pattern from PR #81). +func extractVMSKUCapabilities(sku *armcompute.ResourceSKU) (int, float64) { + var vCPUs int + var memoryGB float64 + for _, cb := range sku.Capabilities { + if cb == nil || cb.Name == nil || cb.Value == nil { + continue + } + switch *cb.Name { + case "vCPUs": + if v, err := strconv.Atoi(*cb.Value); err == nil { + vCPUs = v + } + case "MemoryGB": + if v, err := strconv.ParseFloat(*cb.Value, 64); err == nil { + memoryGB = v + } + } } + return vCPUs, memoryGB } diff --git a/providers/azure/services/compute/client_test.go b/providers/azure/services/compute/client_test.go index dae192ea..bc1936df 100644 --- a/providers/azure/services/compute/client_test.go +++ b/providers/azure/services/compute/client_test.go @@ -5,6 +5,7 @@ import ( "encoding/json" "errors" "net/http" + "strconv" "testing" "time" @@ -661,3 +662,174 @@ func TestBuildReservationBody_OmitsTagsWhenSourceEmpty(t *testing.T) { _, present := got["tags"] assert.False(t, present, "tags must be absent when source is empty") } + +// --- Issue #148: VCPU/MemoryGB enrichment via cached SKU catalogue --- + +// vmSKUCatalogueMockPager is a multi-page-and-error-capable mock used +// only by the issue-148 SKU-catalogue tests below. The shared +// mocks.MockResourceSKUsPager doesn't expose its page counter and +// can't simulate a NextPage error — file-scoped here keeps the shared +// mock surface untouched (matches the cosmosdb / cache test pattern +// where each service defines its own catalogue mock). +type vmSKUCatalogueMockPager struct { + pages []armcompute.ResourceSKUsClientListResponse + index int + err error + pageHits int // count of NextPage invocations — pinned by FetchedOnce +} + +func (m *vmSKUCatalogueMockPager) More() bool { + return m.index < len(m.pages) +} + +func (m *vmSKUCatalogueMockPager) NextPage(_ context.Context) (armcompute.ResourceSKUsClientListResponse, error) { + m.pageHits++ + if m.err != nil { + return armcompute.ResourceSKUsClientListResponse{}, m.err + } + if m.index >= len(m.pages) { + return armcompute.ResourceSKUsClientListResponse{}, errors.New("no more pages") + } + page := m.pages[m.index] + m.index++ + return page, nil +} + +// buildVMSKU constructs an armcompute.ResourceSKU for "virtualMachines" +// in the given region with the standard vCPUs / MemoryGB capabilities +// the converter parses out. +func buildVMSKU(name, region string, vCPUs int, memoryGB string) *armcompute.ResourceSKU { + resourceType := "virtualMachines" + regionStr := region + nameStr := name + vcpuName := "vCPUs" + vcpuVal := strconv.Itoa(vCPUs) + memName := "MemoryGB" + memVal := memoryGB + return &armcompute.ResourceSKU{ + Name: &nameStr, + ResourceType: &resourceType, + Locations: []*string{®ionStr}, + Capabilities: []*armcompute.ResourceSKUCapabilities{ + {Name: &vcpuName, Value: &vcpuVal}, + {Name: &memName, Value: &memVal}, + }, + } +} + +// TestComputeClient_ConvertAzureVMRecommendation_PopulatesVCPUAndMemoryFromSKUCache +// asserts the cached SKU-catalogue lookup populates ComputeDetails.VCPU +// and ComputeDetails.MemoryGB when the catalogue contains the SKU named +// in the recommendation. Pin for issue #148. +func TestComputeClient_ConvertAzureVMRecommendation_PopulatesVCPUAndMemoryFromSKUCache(t *testing.T) { + client := NewClient(nil, "test-subscription", "eastus") + + mockPager := &vmSKUCatalogueMockPager{ + pages: []armcompute.ResourceSKUsClientListResponse{ + { + ResourceSKUsResult: armcompute.ResourceSKUsResult{ + Value: []*armcompute.ResourceSKU{ + buildVMSKU("Standard_D2s_v3", "eastus", 2, "8"), + buildVMSKU("Standard_D4s_v3", "eastus", 4, "16"), + }, + }, + }, + }, + } + client.SetResourceSKUsPager(mockPager) + + rec := mocks.BuildLegacyReservationRecommendation( + mocks.WithRegion("eastus"), + mocks.WithNormalizedSize("Standard_D2s_v3"), + ) + out := client.convertAzureVMRecommendation(context.Background(), rec) + require.NotNil(t, out) + details, ok := out.Details.(common.ComputeDetails) + require.True(t, ok) + assert.Equal(t, "Standard_D2s_v3", details.InstanceType) + assert.Equal(t, 2, details.VCPU, "VCPU must be enriched from the cached SKU catalogue") + assert.Equal(t, 8.0, details.MemoryGB, "MemoryGB must be enriched from the cached SKU catalogue") +} + +// TestComputeClient_ConvertAzureVMRecommendation_PagerErrorFallsBack +// asserts that a SKU-catalogue fetch failure does NOT fail the +// conversion — VCPU/MemoryGB stay at 0 and the rest of Details is +// populated from the recommendation payload. Graceful-degradation +// contract from PR #81, now extended to compute. +func TestComputeClient_ConvertAzureVMRecommendation_PagerErrorFallsBack(t *testing.T) { + client := NewClient(nil, "test-subscription", "eastus") + mockPager := &vmSKUCatalogueMockPager{ + pages: []armcompute.ResourceSKUsClientListResponse{{}}, + err: errors.New("transient Azure API error"), + } + client.SetResourceSKUsPager(mockPager) + + rec := mocks.BuildLegacyReservationRecommendation( + mocks.WithRegion("eastus"), + mocks.WithNormalizedSize("Standard_D2s_v3"), + ) + out := client.convertAzureVMRecommendation(context.Background(), rec) + require.NotNil(t, out, "conversion must NOT fail on catalogue-fetch error") + details, ok := out.Details.(common.ComputeDetails) + require.True(t, ok) + assert.Equal(t, "Standard_D2s_v3", details.InstanceType) + assert.Equal(t, 0, details.VCPU, "VCPU left at 0 when catalogue fetch fails") + assert.Equal(t, 0.0, details.MemoryGB, "MemoryGB left at 0 when catalogue fetch fails") +} + +// TestComputeClient_ConvertAzureVMRecommendation_NoMatchLeavesFieldsZero +// asserts that when the recommendation's SKU isn't in the catalogue +// (e.g. SKU listed for another region only), VCPU/MemoryGB stay at 0 +// and the conversion still produces a usable recommendation. +func TestComputeClient_ConvertAzureVMRecommendation_NoMatchLeavesFieldsZero(t *testing.T) { + client := NewClient(nil, "test-subscription", "eastus") + mockPager := &vmSKUCatalogueMockPager{ + pages: []armcompute.ResourceSKUsClientListResponse{ + { + ResourceSKUsResult: armcompute.ResourceSKUsResult{ + Value: []*armcompute.ResourceSKU{ + buildVMSKU("Standard_D2s_v3", "eastus", 2, "8"), + }, + }, + }, + }, + } + client.SetResourceSKUsPager(mockPager) + + rec := mocks.BuildLegacyReservationRecommendation( + mocks.WithRegion("eastus"), + mocks.WithNormalizedSize("Standard_NC6s_v3"), // not in catalogue + ) + out := client.convertAzureVMRecommendation(context.Background(), rec) + require.NotNil(t, out) + details, ok := out.Details.(common.ComputeDetails) + require.True(t, ok) + assert.Equal(t, "Standard_NC6s_v3", details.InstanceType) + assert.Equal(t, 0, details.VCPU) + assert.Equal(t, 0.0, details.MemoryGB) +} + +// TestComputeClient_CachedSKULookup_FetchedOnce pins the perf +// invariant from PR #81 (now also enforced for compute, per #148): +// many converter calls in the same GetRecommendations run trigger +// exactly ONE catalogue fetch. +func TestComputeClient_CachedSKULookup_FetchedOnce(t *testing.T) { + client := NewClient(nil, "test-subscription", "eastus") + mockPager := &vmSKUCatalogueMockPager{ + pages: []armcompute.ResourceSKUsClientListResponse{ + { + ResourceSKUsResult: armcompute.ResourceSKUsResult{ + Value: []*armcompute.ResourceSKU{ + buildVMSKU("Standard_D2s_v3", "eastus", 2, "8"), + }, + }, + }, + }, + } + client.SetResourceSKUsPager(mockPager) + + for i := 0; i < 10; i++ { + _, _ = client.cachedSKULookup(context.Background(), "Standard_D2s_v3") + } + assert.Equal(t, 1, mockPager.pageHits, "catalogue must be fetched ONCE regardless of lookup count") +}