From e72f45de776309df8ff9be82e33b59b700a1a8c7 Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Mon, 13 Apr 2026 08:28:28 +0200 Subject: [PATCH 01/10] Add VMware commitments KPI --- helm/bundles/cortex-nova/templates/kpis.yaml | 13 ++ .../compute/resource_commitments_vmware.go | 150 +++++++++++++++++ .../resource_commitments_vmware_test.go | 153 ++++++++++++++++++ internal/knowledge/kpis/supported_kpis.go | 1 + 4 files changed, 317 insertions(+) create mode 100644 internal/knowledge/kpis/plugins/compute/resource_commitments_vmware.go create mode 100644 internal/knowledge/kpis/plugins/compute/resource_commitments_vmware_test.go diff --git a/helm/bundles/cortex-nova/templates/kpis.yaml b/helm/bundles/cortex-nova/templates/kpis.yaml index bc5666926..eea5d64d3 100644 --- a/helm/bundles/cortex-nova/templates/kpis.yaml +++ b/helm/bundles/cortex-nova/templates/kpis.yaml @@ -184,3 +184,16 @@ spec: pipelineSchedulingDomain: nova description: | This KPI tracks the state of pipeline resources managed by cortex. +--- +apiVersion: cortex.cloud/v1alpha1 +kind: KPI +metadata: + name: vmware-commitments +spec: + schedulingDomain: nova + impl: vmware_commitments_kpi + dependencies: + datasources: + - name: limes-project-commitments + description: | + This KPI tracks unused VMware commitments based on project commitments and usage. \ No newline at end of file diff --git a/internal/knowledge/kpis/plugins/compute/resource_commitments_vmware.go b/internal/knowledge/kpis/plugins/compute/resource_commitments_vmware.go new file mode 100644 index 000000000..485834370 --- /dev/null +++ b/internal/knowledge/kpis/plugins/compute/resource_commitments_vmware.go @@ -0,0 +1,150 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package compute + +import ( + "log/slog" + "strings" + + "github.com/cobaltcore-dev/cortex/internal/knowledge/datasources/plugins/openstack/limes" + "github.com/cobaltcore-dev/cortex/internal/knowledge/datasources/plugins/openstack/nova" + "github.com/cobaltcore-dev/cortex/internal/knowledge/kpis/plugins" + "github.com/cobaltcore-dev/cortex/internal/knowledge/db" + "github.com/cobaltcore-dev/cortex/pkg/conf" + "github.com/prometheus/client_golang/prometheus" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +type VMwareResourceCommitmentsKPI struct { + // Common base for all KPIs that provides standard functionality. + plugins.BaseKPI[struct{}] // No options passed through yaml config + + unusedInstanceCommitments *prometheus.Desc +} + +func (VMwareResourceCommitmentsKPI) GetName() string { + return "vmware_hana_commitments_kpi" +} + +func (k *VMwareResourceCommitmentsKPI) Init(db *db.DB, client client.Client, opts conf.RawOpts) error { + if err := k.BaseKPI.Init(db, client, opts); err != nil { + return err + } + k.unusedInstanceCommitments = prometheus.NewDesc( + "cortex_vmware_hana_unused_instance_commitments", + "Unused instance commitment capacity summed across all projects (vcpus / ram_mb / disk_gb).", + []string{ + "resource", // "cpu", "ram", "disk" + "availability_zone", + "cpu_architecture", // "sapphire-rapids" (_v2 suffix) or "cascade-lake" + }, + nil, + ) + return nil +} + +func (k *VMwareResourceCommitmentsKPI) Describe(ch chan<- *prometheus.Desc) { + ch <- k.unusedInstanceCommitments +} + +func (k *VMwareResourceCommitmentsKPI) Collect(ch chan<- prometheus.Metric) { + k.collectUnusedCommitments(ch) +} + +func (k *VMwareResourceCommitmentsKPI) collectUnusedCommitments(ch chan<- prometheus.Metric) { + if k.DB == nil { + return + } + // Load confirmed/guaranteed instance commitments. + var commitments []limes.Commitment + if _, err := k.DB.Select(&commitments, ` + SELECT * FROM `+limes.Commitment{}.TableName()+` + WHERE service_type = 'compute' + AND resource_name LIKE 'instances_%' + AND status IN ('confirmed', 'guaranteed') + `); err != nil { + slog.Error("unused_commitments: failed to load commitments", "err", err) + return + } + + // Load flavors for capacity lookup. + var flavors []nova.Flavor + if _, err := k.DB.Select(&flavors, "SELECT * FROM "+nova.Flavor{}.TableName()); err != nil { + slog.Error("unused_commitments: failed to load flavors", "err", err) + return + } + flavorsByName := make(map[string]nova.Flavor, len(flavors)) + for _, flavor := range flavors { + flavorsByName[flavor.Name] = flavor + } + + // Load running HANA servers (non-deleted, non-error). + var servers []nova.Server + if _, err := k.DB.Select(&servers, ` + SELECT * FROM `+nova.Server{}.TableName()+` + WHERE flavor_name LIKE 'hana_%' + AND status NOT IN ('DELETED', 'ERROR') + `); err != nil { + slog.Error("unused_commitments: failed to load servers", "err", err) + return + } + // runningCount: (project_id, flavor_name, az) -> count + type serverKey struct{ projectID, flavorName, az string } + runningCount := make(map[serverKey]int64) + for _, server := range servers { + key := serverKey{server.TenantID, server.FlavorName, server.OSEXTAvailabilityZone} + runningCount[key]++ + } + + // committed: (project_id, flavor_name, az, cpuArchitecture) -> total committed amount + type commitKey struct{ projectID, flavorName, az, cpuArchitecture string } + committed := make(map[commitKey]int64) + for _, c := range commitments { + flavorName := strings.TrimPrefix(c.ResourceName, "instances_") + if !strings.HasPrefix(flavorName, "hana_") { + continue + } + if strings.HasPrefix(flavorName, "hana_k_") { + slog.Info("unused_commitments: skipping hana kvm commitment", "flavor", flavorName, "project_id", c.ProjectID) + continue + } + cpuArchitecture := "cascade-lake" + if strings.HasSuffix(flavorName, "_v2") { + cpuArchitecture = "sapphire-rapids" + } + key := commitKey{c.ProjectID, flavorName, c.AvailabilityZone, cpuArchitecture} + committed[key] += int64(c.Amount) + } + + // For each (project, flavor, az, arch): unused = max(0, committed - running). + // Accumulate capacity into sumByResource: (resource, az, arch) -> value. + type resourceKey struct{ resource, az, arch string } + sumByResource := make(map[resourceKey]float64) + for ck, total := range committed { + sk := serverKey{ck.projectID, ck.flavorName, ck.az} + unused := total - runningCount[sk] + if unused <= 0 { + continue + } + flavor, ok := flavorsByName[ck.flavorName] + if !ok { + slog.Warn("unused_commitments: flavor not found in flavor table", "flavor", ck.flavorName) + continue + } + sumByResource[resourceKey{"cpu", ck.az, ck.cpuArchitecture}] += float64(unused) * float64(flavor.VCPUs) + sumByResource[resourceKey{"ram", ck.az, ck.cpuArchitecture}] += float64(unused) * float64(flavor.RAM) + sumByResource[resourceKey{"disk", ck.az, ck.cpuArchitecture}] += float64(unused) * float64(flavor.Disk) + } + + for rk, value := range sumByResource { + ch <- prometheus.MustNewConstMetric( + k.unusedInstanceCommitments, + prometheus.GaugeValue, + value, + rk.resource, + rk.az, + rk.arch, + ) + } +} diff --git a/internal/knowledge/kpis/plugins/compute/resource_commitments_vmware_test.go b/internal/knowledge/kpis/plugins/compute/resource_commitments_vmware_test.go new file mode 100644 index 000000000..a6acb4dc0 --- /dev/null +++ b/internal/knowledge/kpis/plugins/compute/resource_commitments_vmware_test.go @@ -0,0 +1,153 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package compute + +import ( + "reflect" + "testing" + + "github.com/cobaltcore-dev/cortex/api/v1alpha1" + "github.com/cobaltcore-dev/cortex/internal/knowledge/datasources/plugins/openstack/limes" + "github.com/cobaltcore-dev/cortex/internal/knowledge/datasources/plugins/openstack/nova" + "github.com/cobaltcore-dev/cortex/internal/knowledge/db" + testlibDB "github.com/cobaltcore-dev/cortex/internal/knowledge/db/testing" + "github.com/cobaltcore-dev/cortex/pkg/conf" + "github.com/prometheus/client_golang/prometheus" + prometheusgo "github.com/prometheus/client_model/go" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +func TestVMwareResourceCommitmentsKPI_CollectHanaUnusedCommitments(t *testing.T) { + scheme, err := v1alpha1.SchemeBuilder.Build() + if err != nil { + t.Fatalf("expected no error building scheme, got %v", err) + } + + dbEnv := testlibDB.SetupDBEnv(t) + testDB := db.DB{DbMap: dbEnv.DbMap} + defer dbEnv.Close() + + if err := testDB.CreateTable( + testDB.AddTable(limes.Commitment{}), + testDB.AddTable(nova.Flavor{}), + testDB.AddTable(nova.Server{}), + ); err != nil { + t.Fatalf("expected no error creating tables, got %v", err) + } + + // Flavors: hana_small (4 vcpu, 16384 MB ram, 100 GB disk) + // hana_large_v2 (16 vcpu, 65536 MB ram, 400 GB disk) + if err := testDB.Insert( + &nova.Flavor{ID: "f1", Name: "hana_small", VCPUs: 4, RAM: 16384, Disk: 100}, + &nova.Flavor{ID: "f2", Name: "hana_large_v2", VCPUs: 16, RAM: 65536, Disk: 400}, + &nova.Flavor{ID: "f3", Name: "general_medium", VCPUs: 8, RAM: 32768, Disk: 200}, + ); err != nil { + t.Fatalf("expected no error inserting flavors, got %v", err) + } + + // Commitments across two AZs to verify per-AZ aggregation: + // project-A: 3 x hana_small in az1 (cascade-lake) + // project-B: 2 x hana_large_v2 in az1 (sapphire-rapids) + // project-A: 4 x hana_small in az2 (cascade-lake) — separate AZ bucket + // project-C: 1 x hana_k_foo in az1 — hana_k_ prefix, should be skipped + // project-D: 1 x general_medium — not hana_, should be skipped + // project-A: 10 x hana_small pending — should be excluded (wrong status) + if err := testDB.Insert( + &limes.Commitment{ID: 1, ServiceType: "compute", ResourceName: "instances_hana_small", AvailabilityZone: "az1", Amount: 3, Status: "confirmed", ProjectID: "project-A"}, + &limes.Commitment{ID: 2, ServiceType: "compute", ResourceName: "instances_hana_large_v2", AvailabilityZone: "az1", Amount: 2, Status: "confirmed", ProjectID: "project-B"}, + &limes.Commitment{ID: 3, ServiceType: "compute", ResourceName: "instances_hana_small", AvailabilityZone: "az2", Amount: 4, Status: "guaranteed", ProjectID: "project-A"}, + &limes.Commitment{ID: 4, ServiceType: "compute", ResourceName: "instances_hana_k_foo", AvailabilityZone: "az1", Amount: 5, Status: "confirmed", ProjectID: "project-C"}, + &limes.Commitment{ID: 5, ServiceType: "compute", ResourceName: "instances_general_medium", AvailabilityZone: "az1", Amount: 1, Status: "confirmed", ProjectID: "project-D"}, + &limes.Commitment{ID: 6, ServiceType: "compute", ResourceName: "instances_hana_small", AvailabilityZone: "az1", Amount: 10, Status: "pending", ProjectID: "project-A"}, + ); err != nil { + t.Fatalf("expected no error inserting commitments, got %v", err) + } + + // Running servers: + // project-A/az1: 1 hana_small ACTIVE, 1 DELETED (ignored) → 2 unused in az1 + // project-B/az1: 0 hana_large_v2 → 2 unused in az1 + // project-A/az2: 1 hana_small ACTIVE → 3 unused in az2 + if err := testDB.Insert( + &nova.Server{ID: "s1", TenantID: "project-A", FlavorName: "hana_small", OSEXTAvailabilityZone: "az1", Status: "ACTIVE"}, + &nova.Server{ID: "s2", TenantID: "project-A", FlavorName: "hana_small", OSEXTAvailabilityZone: "az1", Status: "DELETED"}, + &nova.Server{ID: "s3", TenantID: "project-A", FlavorName: "hana_small", OSEXTAvailabilityZone: "az2", Status: "ACTIVE"}, + ); err != nil { + t.Fatalf("expected no error inserting servers, got %v", err) + } + + k8sClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithRuntimeObjects( + &v1alpha1.Knowledge{ObjectMeta: v1.ObjectMeta{Name: "host-details"}}, + ). + Build() + + kpi := &VMwareResourceCommitmentsKPI{} + if err := kpi.Init(&testDB, k8sClient, conf.NewRawOpts("{}")); err != nil { + t.Fatalf("expected no error, got %v", err) + } + + ch := make(chan prometheus.Metric, 100) + kpi.Collect(ch) + close(ch) + + type UnusedMetric struct { + Resource string + AZ string + Arch string + Value float64 + } + + actual := make(map[string]UnusedMetric) + for metric := range ch { + if getMetricName(metric.Desc().String()) != "cortex_vmware_hana_unused_instance_commitments" { + continue + } + var m prometheusgo.Metric + if err := metric.Write(&m); err != nil { + t.Fatalf("failed to write metric: %v", err) + } + labels := make(map[string]string) + for _, lbl := range m.Label { + labels[lbl.GetName()] = lbl.GetValue() + } + key := labels["resource"] + "/" + labels["availability_zone"] + "/" + labels["cpu_architecture"] + actual[key] = UnusedMetric{ + Resource: labels["resource"], + AZ: labels["availability_zone"], + Arch: labels["cpu_architecture"], + Value: m.GetGauge().GetValue(), + } + } + + // project-A/az1: 2 unused hana_small (cascade-lake) → cpu=2×4=8, ram=2×16384=32768, disk=2×100=200 + // project-B/az1: 2 unused hana_large_v2 (sapphire-rapids) → cpu=2×16=32, ram=2×65536=131072, disk=2×400=800 + // project-A/az2: 3 unused hana_small (cascade-lake) → cpu=3×4=12, ram=3×16384=49152, disk=3×100=300 + expected := map[string]UnusedMetric{ + "cpu/az1/cascade-lake": {Resource: "cpu", AZ: "az1", Arch: "cascade-lake", Value: 8}, + "ram/az1/cascade-lake": {Resource: "ram", AZ: "az1", Arch: "cascade-lake", Value: 32768}, + "disk/az1/cascade-lake": {Resource: "disk", AZ: "az1", Arch: "cascade-lake", Value: 200}, + "cpu/az1/sapphire-rapids": {Resource: "cpu", AZ: "az1", Arch: "sapphire-rapids", Value: 32}, + "ram/az1/sapphire-rapids": {Resource: "ram", AZ: "az1", Arch: "sapphire-rapids", Value: 131072}, + "disk/az1/sapphire-rapids": {Resource: "disk", AZ: "az1", Arch: "sapphire-rapids", Value: 800}, + "cpu/az2/cascade-lake": {Resource: "cpu", AZ: "az2", Arch: "cascade-lake", Value: 12}, + "ram/az2/cascade-lake": {Resource: "ram", AZ: "az2", Arch: "cascade-lake", Value: 49152}, + "disk/az2/cascade-lake": {Resource: "disk", AZ: "az2", Arch: "cascade-lake", Value: 300}, + } + + if len(actual) != len(expected) { + t.Errorf("expected %d metrics, got %d: %v", len(expected), len(actual), actual) + } + for key, exp := range expected { + got, ok := actual[key] + if !ok { + t.Errorf("missing metric %q", key) + continue + } + if !reflect.DeepEqual(exp, got) { + t.Errorf("metric %q: expected %+v, got %+v", key, exp, got) + } + } +} diff --git a/internal/knowledge/kpis/supported_kpis.go b/internal/knowledge/kpis/supported_kpis.go index 2623ff8bd..a812943e1 100644 --- a/internal/knowledge/kpis/supported_kpis.go +++ b/internal/knowledge/kpis/supported_kpis.go @@ -22,6 +22,7 @@ var supportedKPIs = map[string]plugins.KPI{ "vm_life_span_kpi": &compute.VMLifeSpanKPI{}, "vm_commitments_kpi": &compute.VMCommitmentsKPI{}, "vm_faults_kpi": &compute.VMFaultsKPI{}, + "vmware_commitments_kpi": &compute.VMwareResourceCommitmentsKPI{}, "netapp_storage_pool_cpu_usage_kpi": &storage.NetAppStoragePoolCPUUsageKPI{}, From fa527f5d5d05f338f02e3389449aa1f0d06a7911 Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Mon, 13 Apr 2026 09:01:08 +0200 Subject: [PATCH 02/10] Add dependencies for nova-flavors and nova-servers to unused VMware commitments KPI --- helm/bundles/cortex-nova/templates/kpis.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/helm/bundles/cortex-nova/templates/kpis.yaml b/helm/bundles/cortex-nova/templates/kpis.yaml index eea5d64d3..a84989b7b 100644 --- a/helm/bundles/cortex-nova/templates/kpis.yaml +++ b/helm/bundles/cortex-nova/templates/kpis.yaml @@ -195,5 +195,7 @@ spec: dependencies: datasources: - name: limes-project-commitments + - name: nova-flavors + - name: nova-servers description: | This KPI tracks unused VMware commitments based on project commitments and usage. \ No newline at end of file From b26d35dcd234d55ba204ef2a770d2d9e8dcc5cf5 Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Mon, 13 Apr 2026 09:01:49 +0200 Subject: [PATCH 03/10] Refactor VMware commitments KPI naming and import order --- .../kpis/plugins/compute/resource_commitments_vmware.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/internal/knowledge/kpis/plugins/compute/resource_commitments_vmware.go b/internal/knowledge/kpis/plugins/compute/resource_commitments_vmware.go index 485834370..a464a286d 100644 --- a/internal/knowledge/kpis/plugins/compute/resource_commitments_vmware.go +++ b/internal/knowledge/kpis/plugins/compute/resource_commitments_vmware.go @@ -9,8 +9,8 @@ import ( "github.com/cobaltcore-dev/cortex/internal/knowledge/datasources/plugins/openstack/limes" "github.com/cobaltcore-dev/cortex/internal/knowledge/datasources/plugins/openstack/nova" - "github.com/cobaltcore-dev/cortex/internal/knowledge/kpis/plugins" "github.com/cobaltcore-dev/cortex/internal/knowledge/db" + "github.com/cobaltcore-dev/cortex/internal/knowledge/kpis/plugins" "github.com/cobaltcore-dev/cortex/pkg/conf" "github.com/prometheus/client_golang/prometheus" "sigs.k8s.io/controller-runtime/pkg/client" @@ -24,7 +24,7 @@ type VMwareResourceCommitmentsKPI struct { } func (VMwareResourceCommitmentsKPI) GetName() string { - return "vmware_hana_commitments_kpi" + return "vmware_commitments_kpi" } func (k *VMwareResourceCommitmentsKPI) Init(db *db.DB, client client.Client, opts conf.RawOpts) error { @@ -35,7 +35,7 @@ func (k *VMwareResourceCommitmentsKPI) Init(db *db.DB, client client.Client, opt "cortex_vmware_hana_unused_instance_commitments", "Unused instance commitment capacity summed across all projects (vcpus / ram_mb / disk_gb).", []string{ - "resource", // "cpu", "ram", "disk" + "resource", // "cpu", "ram", "disk" "availability_zone", "cpu_architecture", // "sapphire-rapids" (_v2 suffix) or "cascade-lake" }, From d098470e664dd521a55c06f523a8a8f908e573b0 Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Mon, 13 Apr 2026 10:10:52 +0200 Subject: [PATCH 04/10] Change runningCount and committed maps to use uint64 for improved capacity handling --- .../kpis/plugins/compute/resource_commitments_vmware.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/internal/knowledge/kpis/plugins/compute/resource_commitments_vmware.go b/internal/knowledge/kpis/plugins/compute/resource_commitments_vmware.go index a464a286d..69a3d140a 100644 --- a/internal/knowledge/kpis/plugins/compute/resource_commitments_vmware.go +++ b/internal/knowledge/kpis/plugins/compute/resource_commitments_vmware.go @@ -91,7 +91,7 @@ func (k *VMwareResourceCommitmentsKPI) collectUnusedCommitments(ch chan<- promet } // runningCount: (project_id, flavor_name, az) -> count type serverKey struct{ projectID, flavorName, az string } - runningCount := make(map[serverKey]int64) + runningCount := make(map[serverKey]uint64) for _, server := range servers { key := serverKey{server.TenantID, server.FlavorName, server.OSEXTAvailabilityZone} runningCount[key]++ @@ -99,7 +99,7 @@ func (k *VMwareResourceCommitmentsKPI) collectUnusedCommitments(ch chan<- promet // committed: (project_id, flavor_name, az, cpuArchitecture) -> total committed amount type commitKey struct{ projectID, flavorName, az, cpuArchitecture string } - committed := make(map[commitKey]int64) + committed := make(map[commitKey]uint64) for _, c := range commitments { flavorName := strings.TrimPrefix(c.ResourceName, "instances_") if !strings.HasPrefix(flavorName, "hana_") { @@ -114,7 +114,7 @@ func (k *VMwareResourceCommitmentsKPI) collectUnusedCommitments(ch chan<- promet cpuArchitecture = "sapphire-rapids" } key := commitKey{c.ProjectID, flavorName, c.AvailabilityZone, cpuArchitecture} - committed[key] += int64(c.Amount) + committed[key] += c.Amount } // For each (project, flavor, az, arch): unused = max(0, committed - running). From c285a07269efa0172543adb336b2141c65706c31 Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Tue, 14 Apr 2026 09:29:39 +0200 Subject: [PATCH 05/10] Fix integer overflow --- .../kpis/plugins/compute/resource_commitments_vmware.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/internal/knowledge/kpis/plugins/compute/resource_commitments_vmware.go b/internal/knowledge/kpis/plugins/compute/resource_commitments_vmware.go index 69a3d140a..0de63a005 100644 --- a/internal/knowledge/kpis/plugins/compute/resource_commitments_vmware.go +++ b/internal/knowledge/kpis/plugins/compute/resource_commitments_vmware.go @@ -123,10 +123,12 @@ func (k *VMwareResourceCommitmentsKPI) collectUnusedCommitments(ch chan<- promet sumByResource := make(map[resourceKey]float64) for ck, total := range committed { sk := serverKey{ck.projectID, ck.flavorName, ck.az} - unused := total - runningCount[sk] - if unused <= 0 { + running := runningCount[sk] + + if running >= total { continue } + unused := total - running flavor, ok := flavorsByName[ck.flavorName] if !ok { slog.Warn("unused_commitments: flavor not found in flavor table", "flavor", ck.flavorName) From 4fc3bed20162e6d2df6d7ecb12105b9cd532a4da Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Tue, 14 Apr 2026 09:39:27 +0200 Subject: [PATCH 06/10] Change log level from Info to Debug for skipping HANA KVM commitments --- .../kpis/plugins/compute/resource_commitments_vmware.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/knowledge/kpis/plugins/compute/resource_commitments_vmware.go b/internal/knowledge/kpis/plugins/compute/resource_commitments_vmware.go index 0de63a005..1b96b3f2f 100644 --- a/internal/knowledge/kpis/plugins/compute/resource_commitments_vmware.go +++ b/internal/knowledge/kpis/plugins/compute/resource_commitments_vmware.go @@ -106,7 +106,7 @@ func (k *VMwareResourceCommitmentsKPI) collectUnusedCommitments(ch chan<- promet continue } if strings.HasPrefix(flavorName, "hana_k_") { - slog.Info("unused_commitments: skipping hana kvm commitment", "flavor", flavorName, "project_id", c.ProjectID) + slog.Debug("unused_commitments: skipping hana kvm commitment", "flavor", flavorName, "project_id", c.ProjectID) continue } cpuArchitecture := "cascade-lake" From ab46ec24f515e6fecf4b36bc20901fbfe9de1056 Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Tue, 14 Apr 2026 15:01:06 +0200 Subject: [PATCH 07/10] Add test cases for over-used and fully used HANA commitments in VMware KPI --- .../resource_commitments_vmware_test.go | 22 ++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/internal/knowledge/kpis/plugins/compute/resource_commitments_vmware_test.go b/internal/knowledge/kpis/plugins/compute/resource_commitments_vmware_test.go index a6acb4dc0..eff29166f 100644 --- a/internal/knowledge/kpis/plugins/compute/resource_commitments_vmware_test.go +++ b/internal/knowledge/kpis/plugins/compute/resource_commitments_vmware_test.go @@ -54,6 +54,8 @@ func TestVMwareResourceCommitmentsKPI_CollectHanaUnusedCommitments(t *testing.T) // project-C: 1 x hana_k_foo in az1 — hana_k_ prefix, should be skipped // project-D: 1 x general_medium — not hana_, should be skipped // project-A: 10 x hana_small pending — should be excluded (wrong status) + // project-E: 2 x hana_small in az1 — running will exceed this (over-used, no metric) + // project-F: 3 x hana_large_v2 in az2 — running exactly equals this (fully used, no metric) if err := testDB.Insert( &limes.Commitment{ID: 1, ServiceType: "compute", ResourceName: "instances_hana_small", AvailabilityZone: "az1", Amount: 3, Status: "confirmed", ProjectID: "project-A"}, &limes.Commitment{ID: 2, ServiceType: "compute", ResourceName: "instances_hana_large_v2", AvailabilityZone: "az1", Amount: 2, Status: "confirmed", ProjectID: "project-B"}, @@ -61,6 +63,8 @@ func TestVMwareResourceCommitmentsKPI_CollectHanaUnusedCommitments(t *testing.T) &limes.Commitment{ID: 4, ServiceType: "compute", ResourceName: "instances_hana_k_foo", AvailabilityZone: "az1", Amount: 5, Status: "confirmed", ProjectID: "project-C"}, &limes.Commitment{ID: 5, ServiceType: "compute", ResourceName: "instances_general_medium", AvailabilityZone: "az1", Amount: 1, Status: "confirmed", ProjectID: "project-D"}, &limes.Commitment{ID: 6, ServiceType: "compute", ResourceName: "instances_hana_small", AvailabilityZone: "az1", Amount: 10, Status: "pending", ProjectID: "project-A"}, + &limes.Commitment{ID: 7, ServiceType: "compute", ResourceName: "instances_hana_small", AvailabilityZone: "az1", Amount: 2, Status: "confirmed", ProjectID: "project-E"}, + &limes.Commitment{ID: 8, ServiceType: "compute", ResourceName: "instances_hana_large_v2", AvailabilityZone: "az2", Amount: 3, Status: "confirmed", ProjectID: "project-F"}, ); err != nil { t.Fatalf("expected no error inserting commitments, got %v", err) } @@ -69,10 +73,20 @@ func TestVMwareResourceCommitmentsKPI_CollectHanaUnusedCommitments(t *testing.T) // project-A/az1: 1 hana_small ACTIVE, 1 DELETED (ignored) → 2 unused in az1 // project-B/az1: 0 hana_large_v2 → 2 unused in az1 // project-A/az2: 1 hana_small ACTIVE → 3 unused in az2 + // project-E/az1: 5 hana_small ACTIVE → 5 > 2 committed → 0 unused (over-used, clamped) + // project-F/az2: 3 hana_large_v2 ACTIVE → 3 == 3 committed → 0 unused (fully used, clamped) if err := testDB.Insert( &nova.Server{ID: "s1", TenantID: "project-A", FlavorName: "hana_small", OSEXTAvailabilityZone: "az1", Status: "ACTIVE"}, &nova.Server{ID: "s2", TenantID: "project-A", FlavorName: "hana_small", OSEXTAvailabilityZone: "az1", Status: "DELETED"}, &nova.Server{ID: "s3", TenantID: "project-A", FlavorName: "hana_small", OSEXTAvailabilityZone: "az2", Status: "ACTIVE"}, + &nova.Server{ID: "s4", TenantID: "project-E", FlavorName: "hana_small", OSEXTAvailabilityZone: "az1", Status: "ACTIVE"}, + &nova.Server{ID: "s5", TenantID: "project-E", FlavorName: "hana_small", OSEXTAvailabilityZone: "az1", Status: "ACTIVE"}, + &nova.Server{ID: "s6", TenantID: "project-E", FlavorName: "hana_small", OSEXTAvailabilityZone: "az1", Status: "ACTIVE"}, + &nova.Server{ID: "s7", TenantID: "project-E", FlavorName: "hana_small", OSEXTAvailabilityZone: "az1", Status: "ACTIVE"}, + &nova.Server{ID: "s8", TenantID: "project-E", FlavorName: "hana_small", OSEXTAvailabilityZone: "az1", Status: "ACTIVE"}, + &nova.Server{ID: "s9", TenantID: "project-F", FlavorName: "hana_large_v2", OSEXTAvailabilityZone: "az2", Status: "ACTIVE"}, + &nova.Server{ID: "s10", TenantID: "project-F", FlavorName: "hana_large_v2", OSEXTAvailabilityZone: "az2", Status: "ACTIVE"}, + &nova.Server{ID: "s11", TenantID: "project-F", FlavorName: "hana_large_v2", OSEXTAvailabilityZone: "az2", Status: "ACTIVE"}, ); err != nil { t.Fatalf("expected no error inserting servers, got %v", err) } @@ -122,9 +136,11 @@ func TestVMwareResourceCommitmentsKPI_CollectHanaUnusedCommitments(t *testing.T) } } - // project-A/az1: 2 unused hana_small (cascade-lake) → cpu=2×4=8, ram=2×16384=32768, disk=2×100=200 - // project-B/az1: 2 unused hana_large_v2 (sapphire-rapids) → cpu=2×16=32, ram=2×65536=131072, disk=2×400=800 - // project-A/az2: 3 unused hana_small (cascade-lake) → cpu=3×4=12, ram=3×16384=49152, disk=3×100=300 + // project-A/az1: 2 unused hana_small (cascade-lake) → cpu=2×4=8, ram=2×16384=32768, disk=2×100=200 + // project-B/az1: 2 unused hana_large_v2 (sapphire-rapids) → cpu=2×16=32, ram=2×65536=131072, disk=2×400=800 + // project-A/az2: 3 unused hana_small (cascade-lake) → cpu=3×4=12, ram=3×16384=49152, disk=3×100=300 + // project-E/az1: 5 running > 2 committed hana_small → clamped to 0, no metric emitted + // project-F/az2: 3 running == 3 committed hana_large_v2 → clamped to 0, no metric emitted expected := map[string]UnusedMetric{ "cpu/az1/cascade-lake": {Resource: "cpu", AZ: "az1", Arch: "cascade-lake", Value: 8}, "ram/az1/cascade-lake": {Resource: "ram", AZ: "az1", Arch: "cascade-lake", Value: 32768}, From 3df7e491bea1ff324319f6d05dc6367bb5349909 Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Tue, 14 Apr 2026 15:06:52 +0200 Subject: [PATCH 08/10] fix --- .../plugins/compute/resource_commitments_vmware_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/internal/knowledge/kpis/plugins/compute/resource_commitments_vmware_test.go b/internal/knowledge/kpis/plugins/compute/resource_commitments_vmware_test.go index eff29166f..e67fc0c74 100644 --- a/internal/knowledge/kpis/plugins/compute/resource_commitments_vmware_test.go +++ b/internal/knowledge/kpis/plugins/compute/resource_commitments_vmware_test.go @@ -136,9 +136,9 @@ func TestVMwareResourceCommitmentsKPI_CollectHanaUnusedCommitments(t *testing.T) } } - // project-A/az1: 2 unused hana_small (cascade-lake) → cpu=2×4=8, ram=2×16384=32768, disk=2×100=200 - // project-B/az1: 2 unused hana_large_v2 (sapphire-rapids) → cpu=2×16=32, ram=2×65536=131072, disk=2×400=800 - // project-A/az2: 3 unused hana_small (cascade-lake) → cpu=3×4=12, ram=3×16384=49152, disk=3×100=300 + // project-A/az1: 2 unused hana_small (cascade-lake) → cpu=2*4=8, ram=2*16384=32768, disk=2*100=200 + // project-B/az1: 2 unused hana_large_v2 (sapphire-rapids) → cpu=2*16=32, ram=2*65536=131072, disk=2*400=800 + // project-A/az2: 3 unused hana_small (cascade-lake) → cpu=3*4=12, ram=3*16384=49152, disk=3*100=300 // project-E/az1: 5 running > 2 committed hana_small → clamped to 0, no metric emitted // project-F/az2: 3 running == 3 committed hana_large_v2 → clamped to 0, no metric emitted expected := map[string]UnusedMetric{ From c1e44b59b23c6283ad1105d2e08bb0c9c4b852fb Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Wed, 15 Apr 2026 10:02:09 +0200 Subject: [PATCH 09/10] Add check for duplicate metric keys in VMware HANA unused commitments test --- .../kpis/plugins/compute/resource_commitments_vmware_test.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/internal/knowledge/kpis/plugins/compute/resource_commitments_vmware_test.go b/internal/knowledge/kpis/plugins/compute/resource_commitments_vmware_test.go index e67fc0c74..14cffed6d 100644 --- a/internal/knowledge/kpis/plugins/compute/resource_commitments_vmware_test.go +++ b/internal/knowledge/kpis/plugins/compute/resource_commitments_vmware_test.go @@ -128,6 +128,10 @@ func TestVMwareResourceCommitmentsKPI_CollectHanaUnusedCommitments(t *testing.T) labels[lbl.GetName()] = lbl.GetValue() } key := labels["resource"] + "/" + labels["availability_zone"] + "/" + labels["cpu_architecture"] + if _, exists := actual[key]; exists { + t.Fatalf("duplicate metric key %q (resource=%q, availability_zone=%q, cpu_architecture=%q)", + key, labels["resource"], labels["availability_zone"], labels["cpu_architecture"]) + } actual[key] = UnusedMetric{ Resource: labels["resource"], AZ: labels["availability_zone"], From 1aada2f53759728f611433911acf086794271795 Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Thu, 16 Apr 2026 09:01:34 +0200 Subject: [PATCH 10/10] Implement functions to retrieve running HANA servers and flavors, and refactor unused capacity calculation logic --- .../compute/resource_commitments_vmware.go | 147 +++++++++++------ .../resource_commitments_vmware_test.go | 148 ++++++++++++++++-- 2 files changed, 234 insertions(+), 61 deletions(-) diff --git a/internal/knowledge/kpis/plugins/compute/resource_commitments_vmware.go b/internal/knowledge/kpis/plugins/compute/resource_commitments_vmware.go index 1b96b3f2f..74cde06a0 100644 --- a/internal/knowledge/kpis/plugins/compute/resource_commitments_vmware.go +++ b/internal/knowledge/kpis/plugins/compute/resource_commitments_vmware.go @@ -52,54 +52,78 @@ func (k *VMwareResourceCommitmentsKPI) Collect(ch chan<- prometheus.Metric) { k.collectUnusedCommitments(ch) } -func (k *VMwareResourceCommitmentsKPI) collectUnusedCommitments(ch chan<- prometheus.Metric) { - if k.DB == nil { - return - } - // Load confirmed/guaranteed instance commitments. - var commitments []limes.Commitment - if _, err := k.DB.Select(&commitments, ` - SELECT * FROM `+limes.Commitment{}.TableName()+` - WHERE service_type = 'compute' - AND resource_name LIKE 'instances_%' - AND status IN ('confirmed', 'guaranteed') +// getRunningHANAServers loads all running HANA servers from the database. We consider a server "running" if its status is not DELETED or ERROR. +func (k *VMwareResourceCommitmentsKPI) getRunningHANAServers() ([]nova.Server, error) { + // Load running HANA servers (non-deleted, non-error). + var servers []nova.Server + if _, err := k.DB.Select(&servers, ` + SELECT * FROM `+nova.Server{}.TableName()+` + WHERE flavor_name LIKE 'hana_%' + AND status NOT IN ('DELETED', 'ERROR') `); err != nil { - slog.Error("unused_commitments: failed to load commitments", "err", err) - return + return nil, err } + return servers, nil +} - // Load flavors for capacity lookup. +// getFlavorsByName loads all flavors from the database and returns a map of flavor name to flavor struct for easy lookup. +func (k *VMwareResourceCommitmentsKPI) getFlavorsByName() (map[string]nova.Flavor, error) { var flavors []nova.Flavor if _, err := k.DB.Select(&flavors, "SELECT * FROM "+nova.Flavor{}.TableName()); err != nil { - slog.Error("unused_commitments: failed to load flavors", "err", err) - return + return nil, err } flavorsByName := make(map[string]nova.Flavor, len(flavors)) for _, flavor := range flavors { flavorsByName[flavor.Name] = flavor } + return flavorsByName, nil +} - // Load running HANA servers (non-deleted, non-error). - var servers []nova.Server - if _, err := k.DB.Select(&servers, ` - SELECT * FROM `+nova.Server{}.TableName()+` - WHERE flavor_name LIKE 'hana_%' - AND status NOT IN ('DELETED', 'ERROR') +// getInstanceCommitments loads all confirmed or guaranteed instance commitments from the database. +func (k *VMwareResourceCommitmentsKPI) getInstanceCommitments() ([]limes.Commitment, error) { + var commitments []limes.Commitment + if _, err := k.DB.Select(&commitments, ` + SELECT * FROM `+limes.Commitment{}.TableName()+` + WHERE service_type = 'compute' + AND resource_name LIKE 'instances_%' + AND status IN ('confirmed', 'guaranteed') `); err != nil { - slog.Error("unused_commitments: failed to load servers", "err", err) - return + return nil, err + } + return commitments, nil +} + +// cpuArchitectureForFlavor returns the CPU architecture label for a HANA flavor name. +// Flavors with a "_v2" suffix run on sapphire-rapids; all others are cascade-lake. +func cpuArchitectureForFlavor(flavorName string) string { + if strings.HasSuffix(flavorName, "_v2") { + return "sapphire-rapids" } - // runningCount: (project_id, flavor_name, az) -> count - type serverKey struct{ projectID, flavorName, az string } - runningCount := make(map[serverKey]uint64) - for _, server := range servers { - key := serverKey{server.TenantID, server.FlavorName, server.OSEXTAvailabilityZone} - runningCount[key]++ + return "cascade-lake" +} + +// resourceKey identifies an aggregated capacity bucket by (resource, az, architecture). +type resourceKey struct{ resource, az, architecture string } + +// calculateUnusedInstanceCapacity computes per-(resource, az, architecture) capacity sums for unused +// HANA VMware commitments. It filters out non-HANA and KVM (hana_k_) commitments, then for each +// (project, flavor, az, architecture) bucket subtracts running servers from committed amount; over-used +// buckets are clamped to zero and omitted from the result. +func calculateUnusedInstanceCapacity( + commitments []limes.Commitment, + servers []nova.Server, + flavorsByName map[string]nova.Flavor, +) map[resourceKey]float64 { + // running: (project_id, flavor_name, az) -> count of non-deleted/non-error servers. + type serverCountKey struct{ projectID, flavorName, az string } + running := make(map[serverCountKey]uint64, len(servers)) + for _, s := range servers { + running[serverCountKey{s.TenantID, s.FlavorName, s.OSEXTAvailabilityZone}]++ } - // committed: (project_id, flavor_name, az, cpuArchitecture) -> total committed amount - type commitKey struct{ projectID, flavorName, az, cpuArchitecture string } - committed := make(map[commitKey]uint64) + // committed: (project_id, flavor_name, az, cpuArchitecture) -> total committed amount. + type commitmentKey struct{ projectID, flavorName, az, cpuArchitecture string } + committed := make(map[commitmentKey]uint64) for _, c := range commitments { flavorName := strings.TrimPrefix(c.ResourceName, "instances_") if !strings.HasPrefix(flavorName, "hana_") { @@ -109,36 +133,57 @@ func (k *VMwareResourceCommitmentsKPI) collectUnusedCommitments(ch chan<- promet slog.Debug("unused_commitments: skipping hana kvm commitment", "flavor", flavorName, "project_id", c.ProjectID) continue } - cpuArchitecture := "cascade-lake" - if strings.HasSuffix(flavorName, "_v2") { - cpuArchitecture = "sapphire-rapids" - } - key := commitKey{c.ProjectID, flavorName, c.AvailabilityZone, cpuArchitecture} + key := commitmentKey{c.ProjectID, flavorName, c.AvailabilityZone, cpuArchitectureForFlavor(flavorName)} committed[key] += c.Amount } - // For each (project, flavor, az, arch): unused = max(0, committed - running). - // Accumulate capacity into sumByResource: (resource, az, arch) -> value. - type resourceKey struct{ resource, az, arch string } - sumByResource := make(map[resourceKey]float64) + sum := make(map[resourceKey]float64) for ck, total := range committed { - sk := serverKey{ck.projectID, ck.flavorName, ck.az} - running := runningCount[sk] - - if running >= total { + run := running[serverCountKey{ck.projectID, ck.flavorName, ck.az}] + if run >= total { continue } - unused := total - running + unused := total - run flavor, ok := flavorsByName[ck.flavorName] if !ok { slog.Warn("unused_commitments: flavor not found in flavor table", "flavor", ck.flavorName) continue } - sumByResource[resourceKey{"cpu", ck.az, ck.cpuArchitecture}] += float64(unused) * float64(flavor.VCPUs) - sumByResource[resourceKey{"ram", ck.az, ck.cpuArchitecture}] += float64(unused) * float64(flavor.RAM) - sumByResource[resourceKey{"disk", ck.az, ck.cpuArchitecture}] += float64(unused) * float64(flavor.Disk) + sum[resourceKey{"cpu", ck.az, ck.cpuArchitecture}] += float64(unused) * float64(flavor.VCPUs) + sum[resourceKey{"ram", ck.az, ck.cpuArchitecture}] += float64(unused) * float64(flavor.RAM) + sum[resourceKey{"disk", ck.az, ck.cpuArchitecture}] += float64(unused) * float64(flavor.Disk) + } + return sum +} + +func (k *VMwareResourceCommitmentsKPI) collectUnusedCommitments(ch chan<- prometheus.Metric) { + if k.DB == nil { + return + } + + // Load confirmed/guaranteed instance commitments. + commitments, err := k.getInstanceCommitments() + if err != nil { + slog.Error("unused_commitments: failed to load commitments", "err", err) + return + } + + // Load flavors for capacity lookup. + flavorsByName, err := k.getFlavorsByName() + if err != nil { + slog.Error("unused_commitments: failed to load flavors", "err", err) + return } + // Load running HANA servers. + servers, err := k.getRunningHANAServers() + if err != nil { + slog.Error("unused_commitments: failed to get running HANA servers", "err", err) + return + } + + sumByResource := calculateUnusedInstanceCapacity(commitments, servers, flavorsByName) + for rk, value := range sumByResource { ch <- prometheus.MustNewConstMetric( k.unusedInstanceCommitments, @@ -146,7 +191,7 @@ func (k *VMwareResourceCommitmentsKPI) collectUnusedCommitments(ch chan<- promet value, rk.resource, rk.az, - rk.arch, + rk.architecture, ) } } diff --git a/internal/knowledge/kpis/plugins/compute/resource_commitments_vmware_test.go b/internal/knowledge/kpis/plugins/compute/resource_commitments_vmware_test.go index 14cffed6d..90a1abd3b 100644 --- a/internal/knowledge/kpis/plugins/compute/resource_commitments_vmware_test.go +++ b/internal/knowledge/kpis/plugins/compute/resource_commitments_vmware_test.go @@ -19,6 +19,134 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client/fake" ) +func TestCPUArchitectureForFlavor(t *testing.T) { + tests := []struct { + flavorName string + want string + }{ + {"hana_small", "cascade-lake"}, + {"hana_large", "cascade-lake"}, + {"hana_small_v2", "sapphire-rapids"}, + {"hana_large_v2", "sapphire-rapids"}, + {"hana_v2_extra", "cascade-lake"}, // _v2 must be a suffix + {"hana_x_v2", "sapphire-rapids"}, + } + for _, tt := range tests { + t.Run(tt.flavorName, func(t *testing.T) { + got := cpuArchitectureForFlavor(tt.flavorName) + if got != tt.want { + t.Errorf("cpuArchitectureForFlavor(%q) = %q, want %q", tt.flavorName, got, tt.want) + } + }) + } +} + +func TestCalculateUnusedInstanceCapacity(t *testing.T) { + flavors := map[string]nova.Flavor{ + "hana_small": {VCPUs: 4, RAM: 16384, Disk: 100}, + "hana_large_v2": {VCPUs: 16, RAM: 65536, Disk: 400}, + } + + t.Run("basic unused", func(t *testing.T) { + commitments := []limes.Commitment{ + {ProjectID: "p1", ResourceName: "instances_hana_small", AvailabilityZone: "az1", Amount: 3}, + } + servers := []nova.Server{ + {TenantID: "p1", FlavorName: "hana_small", OSEXTAvailabilityZone: "az1"}, // 1 running -> 2 unused + } + got := calculateUnusedInstanceCapacity(commitments, servers, flavors) + + if got[resourceKey{"cpu", "az1", "cascade-lake"}] != 8 { // 2 * 4 + t.Errorf("expected cpu=8, got %v", got[resourceKey{"cpu", "az1", "cascade-lake"}]) + } + if got[resourceKey{"ram", "az1", "cascade-lake"}] != 32768 { // 2 * 16384 + t.Errorf("expected ram=32768, got %v", got[resourceKey{"ram", "az1", "cascade-lake"}]) + } + if got[resourceKey{"disk", "az1", "cascade-lake"}] != 200 { // 2 * 100 + t.Errorf("expected disk=200, got %v", got[resourceKey{"disk", "az1", "cascade-lake"}]) + } + }) + + t.Run("non-hana and kvm commitments are skipped", func(t *testing.T) { + commitments := []limes.Commitment{ + {ProjectID: "p1", ResourceName: "instances_hana_k_foo", AvailabilityZone: "az1", Amount: 5}, + {ProjectID: "p2", ResourceName: "instances_general_medium", AvailabilityZone: "az1", Amount: 3}, + } + got := calculateUnusedInstanceCapacity(commitments, nil, flavors) + if len(got) != 0 { + t.Errorf("expected no metrics for kvm/non-hana commitments, got %v", got) + } + }) + + t.Run("amounts for the same key are summed", func(t *testing.T) { + commitments := []limes.Commitment{ + {ProjectID: "p1", ResourceName: "instances_hana_small", AvailabilityZone: "az1", Amount: 3}, + {ProjectID: "p1", ResourceName: "instances_hana_small", AvailabilityZone: "az1", Amount: 2}, + } + got := calculateUnusedInstanceCapacity(commitments, nil, flavors) // nil servers -> all unused + if got[resourceKey{"cpu", "az1", "cascade-lake"}] != 20 { // 5 * 4 + t.Errorf("expected cpu=20 for summed commitments, got %v", got[resourceKey{"cpu", "az1", "cascade-lake"}]) + } + }) + + t.Run("over-used bucket emits no metric", func(t *testing.T) { + commitments := []limes.Commitment{ + {ProjectID: "p1", ResourceName: "instances_hana_small", AvailabilityZone: "az1", Amount: 2}, + } + servers := []nova.Server{ // 5 running > 2 committed + {TenantID: "p1", FlavorName: "hana_small", OSEXTAvailabilityZone: "az1"}, + {TenantID: "p1", FlavorName: "hana_small", OSEXTAvailabilityZone: "az1"}, + {TenantID: "p1", FlavorName: "hana_small", OSEXTAvailabilityZone: "az1"}, + {TenantID: "p1", FlavorName: "hana_small", OSEXTAvailabilityZone: "az1"}, + {TenantID: "p1", FlavorName: "hana_small", OSEXTAvailabilityZone: "az1"}, + } + got := calculateUnusedInstanceCapacity(commitments, servers, flavors) + if len(got) != 0 { + t.Errorf("expected no metrics for over-used bucket, got %v", got) + } + }) + + t.Run("exactly-used bucket emits no metric", func(t *testing.T) { + commitments := []limes.Commitment{ + {ProjectID: "p1", ResourceName: "instances_hana_small", AvailabilityZone: "az1", Amount: 3}, + } + servers := []nova.Server{ // 3 running == 3 committed + {TenantID: "p1", FlavorName: "hana_small", OSEXTAvailabilityZone: "az1"}, + {TenantID: "p1", FlavorName: "hana_small", OSEXTAvailabilityZone: "az1"}, + {TenantID: "p1", FlavorName: "hana_small", OSEXTAvailabilityZone: "az1"}, + } + got := calculateUnusedInstanceCapacity(commitments, servers, flavors) + if len(got) != 0 { + t.Errorf("expected no metrics for fully-used bucket, got %v", got) + } + }) + + t.Run("unknown flavor is skipped", func(t *testing.T) { + commitments := []limes.Commitment{ + {ProjectID: "p1", ResourceName: "instances_hana_unknown", AvailabilityZone: "az1", Amount: 3}, + } + got := calculateUnusedInstanceCapacity(commitments, nil, flavors) + if len(got) != 0 { + t.Errorf("expected no metrics for unknown flavor, got %v", got) + } + }) + + t.Run("multiple keys aggregated correctly", func(t *testing.T) { + commitments := []limes.Commitment{ + {ProjectID: "p1", ResourceName: "instances_hana_small", AvailabilityZone: "az1", Amount: 2}, + {ProjectID: "p2", ResourceName: "instances_hana_large_v2", AvailabilityZone: "az1", Amount: 1}, + } + got := calculateUnusedInstanceCapacity(commitments, nil, flavors) // nil running -> all unused + + if got[resourceKey{"cpu", "az1", "cascade-lake"}] != 8 { // 2 * 4 + t.Errorf("expected cpu cascade-lake=8, got %v", got[resourceKey{"cpu", "az1", "cascade-lake"}]) + } + if got[resourceKey{"cpu", "az1", "sapphire-rapids"}] != 16 { // 1 * 16 + t.Errorf("expected cpu sapphire-rapids=16, got %v", got[resourceKey{"cpu", "az1", "sapphire-rapids"}]) + } + }) +} + func TestVMwareResourceCommitmentsKPI_CollectHanaUnusedCommitments(t *testing.T) { scheme, err := v1alpha1.SchemeBuilder.Build() if err != nil { @@ -70,11 +198,11 @@ func TestVMwareResourceCommitmentsKPI_CollectHanaUnusedCommitments(t *testing.T) } // Running servers: - // project-A/az1: 1 hana_small ACTIVE, 1 DELETED (ignored) → 2 unused in az1 - // project-B/az1: 0 hana_large_v2 → 2 unused in az1 - // project-A/az2: 1 hana_small ACTIVE → 3 unused in az2 - // project-E/az1: 5 hana_small ACTIVE → 5 > 2 committed → 0 unused (over-used, clamped) - // project-F/az2: 3 hana_large_v2 ACTIVE → 3 == 3 committed → 0 unused (fully used, clamped) + // project-A/az1: 1 hana_small ACTIVE, 1 DELETED (ignored) -> 2 unused in az1 + // project-B/az1: 0 hana_large_v2 -> 2 unused in az1 + // project-A/az2: 1 hana_small ACTIVE -> 3 unused in az2 + // project-E/az1: 5 hana_small ACTIVE -> 5 > 2 committed -> 0 unused (over-used, clamped) + // project-F/az2: 3 hana_large_v2 ACTIVE -> 3 == 3 committed -> 0 unused (fully used, clamped) if err := testDB.Insert( &nova.Server{ID: "s1", TenantID: "project-A", FlavorName: "hana_small", OSEXTAvailabilityZone: "az1", Status: "ACTIVE"}, &nova.Server{ID: "s2", TenantID: "project-A", FlavorName: "hana_small", OSEXTAvailabilityZone: "az1", Status: "DELETED"}, @@ -140,11 +268,11 @@ func TestVMwareResourceCommitmentsKPI_CollectHanaUnusedCommitments(t *testing.T) } } - // project-A/az1: 2 unused hana_small (cascade-lake) → cpu=2*4=8, ram=2*16384=32768, disk=2*100=200 - // project-B/az1: 2 unused hana_large_v2 (sapphire-rapids) → cpu=2*16=32, ram=2*65536=131072, disk=2*400=800 - // project-A/az2: 3 unused hana_small (cascade-lake) → cpu=3*4=12, ram=3*16384=49152, disk=3*100=300 - // project-E/az1: 5 running > 2 committed hana_small → clamped to 0, no metric emitted - // project-F/az2: 3 running == 3 committed hana_large_v2 → clamped to 0, no metric emitted + // project-A/az1: 2 unused hana_small (cascade-lake) -> cpu=2*4=8, ram=2*16384=32768, disk=2*100=200 + // project-B/az1: 2 unused hana_large_v2 (sapphire-rapids) -> cpu=2*16=32, ram=2*65536=131072, disk=2*400=800 + // project-A/az2: 3 unused hana_small (cascade-lake) -> cpu=3*4=12, ram=3*16384=49152, disk=3*100=300 + // project-E/az1: 5 running > 2 committed hana_small -> clamped to 0, no metric emitted + // project-F/az2: 3 running == 3 committed hana_large_v2 -> clamped to 0, no metric emitted expected := map[string]UnusedMetric{ "cpu/az1/cascade-lake": {Resource: "cpu", AZ: "az1", Arch: "cascade-lake", Value: 8}, "ram/az1/cascade-lake": {Resource: "ram", AZ: "az1", Arch: "cascade-lake", Value: 32768},