From 082cc4404496d3f8d85225150b8e8f72d1483400 Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Wed, 6 May 2026 10:16:53 +0200 Subject: [PATCH 1/3] feat: add kvm project usage metrics --- .../infrastructure/kvm_project_utilization.go | 205 +++++ .../kvm_project_utilization_test.go | 784 ++++++++++++++++++ .../kpis/plugins/infrastructure/shared.go | 68 ++ .../plugins/infrastructure/shared_test.go | 108 ++- .../vmware_project_utilization.go | 15 +- 5 files changed, 1166 insertions(+), 14 deletions(-) create mode 100644 internal/knowledge/kpis/plugins/infrastructure/kvm_project_utilization.go create mode 100644 internal/knowledge/kpis/plugins/infrastructure/kvm_project_utilization_test.go diff --git a/internal/knowledge/kpis/plugins/infrastructure/kvm_project_utilization.go b/internal/knowledge/kpis/plugins/infrastructure/kvm_project_utilization.go new file mode 100644 index 000000000..c4a3c623e --- /dev/null +++ b/internal/knowledge/kpis/plugins/infrastructure/kvm_project_utilization.go @@ -0,0 +1,205 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package infrastructure + +import ( + "context" + "log/slog" + + "github.com/cobaltcore-dev/cortex/internal/knowledge/datasources/plugins/openstack/identity" + "github.com/cobaltcore-dev/cortex/internal/knowledge/datasources/plugins/openstack/nova" + "github.com/cobaltcore-dev/cortex/internal/knowledge/db" + "github.com/cobaltcore-dev/cortex/internal/knowledge/kpis/plugins" + "github.com/cobaltcore-dev/cortex/pkg/conf" + hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" + "github.com/prometheus/client_golang/prometheus" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +type kvmProjectInstanceCount struct { + ProjectID string `db:"project_id"` + ProjectName string `db:"project_name"` + DomainID string `db:"domain_id"` + DomainName string `db:"domain_name"` + ComputeHost string `db:"compute_host"` + FlavorName string `db:"flavor_name"` + AvailabilityZone string `db:"availability_zone"` + InstanceCount float64 `db:"instance_count"` +} + +type kvmProjectCapacityUsage struct { + ProjectID string `db:"project_id"` + ProjectName string `db:"project_name"` + DomainID string `db:"domain_id"` + DomainName string `db:"domain_name"` + ComputeHost string `db:"compute_host"` + AvailabilityZone string `db:"availability_zone"` + TotalVCPUs float64 `db:"total_vcpus"` + TotalRAMMB float64 `db:"total_ram_mb"` + TotalDiskGB float64 `db:"total_disk_gb"` +} + +type KVMProjectUtilizationKPI struct { + // BaseKPI provides common fields and methods for all KPIs, such as database connection and Kubernetes client. + plugins.BaseKPI[struct{}] + + // instanceCountPerProjectAndHostAndFlavor is a Prometheus descriptor for the metric that counts the number of instances per project, host, and flavor. + instanceCountPerProjectAndHostAndFlavor *prometheus.Desc + + // capacityUsagePerProjectAndHost is a Prometheus descriptor for the metric that measures the capacity usage per project and host. + capacityUsagePerProjectAndHost *prometheus.Desc +} + +func (k *KVMProjectUtilizationKPI) GetName() string { + return "kvm_project_utilization_kpi" +} + +func (k *KVMProjectUtilizationKPI) Init(dbConn *db.DB, c client.Client, opts conf.RawOpts) error { + if err := k.BaseKPI.Init(dbConn, c, opts); err != nil { + return err + } + + k.instanceCountPerProjectAndHostAndFlavor = prometheus.NewDesc( + "cortex_kvm_project_instances", + "Number of running instances per project, hypervisor, and flavor on KVM.", + append(kvmHostLabels, "project_id", "project_name", "domain_id", "domain_name", "flavor_name"), nil, + ) + k.capacityUsagePerProjectAndHost = prometheus.NewDesc( + "cortex_kvm_project_capacity_usage", + "Resource capacity used by a project per KVM hypervisor and flavor. CPU in vCPUs, memory and disk in bytes.", + append(kvmHostLabels, "project_id", "project_name", "domain_id", "domain_name", "resource"), nil, + ) + return nil +} + +func (k *KVMProjectUtilizationKPI) Describe(ch chan<- *prometheus.Desc) { + ch <- k.instanceCountPerProjectAndHostAndFlavor + ch <- k.capacityUsagePerProjectAndHost +} + +func (k *KVMProjectUtilizationKPI) Collect(ch chan<- prometheus.Metric) { + hosts, err := k.getKVMHosts() + if err != nil { + slog.Error("kvm_project_utilization: failed to get KVM hosts", "error", err) + return + } + + // Export project x flavor x compute_host instance count metric + projectInstanceCounts, err := k.queryProjectInstanceCount() + if err != nil { + slog.Error("kvm_project_utilization: Failed to query project instance count for project utilization KPI", "error", err) + return + } + for _, projectInstanceCount := range projectInstanceCounts { + host, ok := hosts[projectInstanceCount.ComputeHost] + if !ok { + slog.Warn("kvm_project_utilization: Compute host not found for project instance count", "compute_host", projectInstanceCount.ComputeHost) + continue + } + hostLabels := host.getHostLabels() + hostLabels = append(hostLabels, projectInstanceCount.ProjectID, projectInstanceCount.ProjectName, projectInstanceCount.DomainID, projectInstanceCount.DomainName, projectInstanceCount.FlavorName) + ch <- prometheus.MustNewConstMetric(k.instanceCountPerProjectAndHostAndFlavor, prometheus.GaugeValue, projectInstanceCount.InstanceCount, hostLabels...) + } + + // Export project x compute_host x resource capacity usage metric + projectCapacityUsages, err := k.queryProjectCapacityUsage() + if err != nil { + slog.Error("kvm_project_utilization: Failed to query project capacity usage for project utilization KPI", "error", err) + return + } + for _, projectCapacityUsage := range projectCapacityUsages { + host, ok := hosts[projectCapacityUsage.ComputeHost] + if !ok { + slog.Warn("kvm_project_utilization: Compute host not found for project capacity usage", "compute_host", projectCapacityUsage.ComputeHost) + continue + } + hostLabels := host.getHostLabels() + hostLabels = append(hostLabels, projectCapacityUsage.ProjectID, projectCapacityUsage.ProjectName, projectCapacityUsage.DomainID, projectCapacityUsage.DomainName) + + ch <- prometheus.MustNewConstMetric(k.capacityUsagePerProjectAndHost, prometheus.GaugeValue, projectCapacityUsage.TotalVCPUs, append(hostLabels, "vcpu")...) + ch <- prometheus.MustNewConstMetric(k.capacityUsagePerProjectAndHost, prometheus.GaugeValue, projectCapacityUsage.TotalRAMMB*1024*1024, append(hostLabels, "memory")...) + ch <- prometheus.MustNewConstMetric(k.capacityUsagePerProjectAndHost, prometheus.GaugeValue, projectCapacityUsage.TotalDiskGB*1024*1024*1024, append(hostLabels, "disk")...) + } + +} + +// getKVMHosts retrieves the list of KVM hosts and their details from the database, returning a map keyed by compute host name. +func (k *KVMProjectUtilizationKPI) getKVMHosts() (map[string]kvmHost, error) { + hvs := &hv1.HypervisorList{} + if err := k.Client.List(context.Background(), hvs); err != nil { + return nil, err + } + + hosts := make(map[string]kvmHost, len(hvs.Items)) + for _, hv := range hvs.Items { + host := kvmHost{Hypervisor: hv} + hosts[host.Name] = host + } + return hosts, nil +} + +// queryProjectInstanceCount retrieves the number of running instances per project, hypervisor, and flavor on KVM from the database. +func (k *KVMProjectUtilizationKPI) queryProjectCapacityUsage() ([]kvmProjectCapacityUsage, error) { + // This query will fetch all active instances. It will perform a join with the openstack projects to get the project name. + // It will also join with the flavors table to get the flavor information, which is needed for the capacity usage metrics. + // The results will be grouped by project, compute host, and availability zone to get the total capacity usage per project and hypervisor. + // We will filter the results to only include instances that are running on KVM hypervisors by checking the compute host name pattern. + // This assumes that all KVM hypervisors have a compute host name that follows the pattern "nodeXXX-bbYYY", + // which is a naming convention in SAP Cloud Infrastructure and may need to be adjusted based on the actual environment. + query := ` + SELECT + s.tenant_id AS project_id, + COALESCE(p.name, '') AS project_name, + COALESCE(p.domain_id, '') AS domain_id, + COALESCE(d.name, '') AS domain_name, + s.os_ext_srv_attr_host AS compute_host, + s.os_ext_az_availability_zone AS availability_zone, + COALESCE(SUM(f.vcpus), 0) AS total_vcpus, + COALESCE(SUM(f.ram), 0) AS total_ram_mb, + COALESCE(SUM(f.disk), 0) AS total_disk_gb + FROM ` + nova.Server{}.TableName() + ` s + LEFT JOIN ` + nova.Flavor{}.TableName() + ` f ON s.flavor_name = f.name + LEFT JOIN ` + identity.Project{}.TableName() + ` p ON p.id = s.tenant_id + LEFT JOIN ` + identity.Domain{}.TableName() + ` d ON d.id = p.domain_id + WHERE s.status NOT IN ('DELETED', 'ERROR') + AND s.os_ext_srv_attr_host LIKE '` + kvmComputeHostPattern + `' + GROUP BY s.tenant_id, p.name, p.domain_id, d.name, s.os_ext_srv_attr_host, s.os_ext_az_availability_zone + ` + var usages []kvmProjectCapacityUsage + if _, err := k.DB.Select(&usages, query); err != nil { + return nil, err + } + return usages, nil +} + +// queryProjectInstanceCount retrieves the number of running instances per project, hypervisor, and flavor on KVM. +func (k *KVMProjectUtilizationKPI) queryProjectInstanceCount() ([]kvmProjectInstanceCount, error) { + // This query will fetch all active instances. It will perform a join with the openstack projects to get the project name. + // The results will be grouped by project, hypervisor, flavor, and availability zone to get the instance count. + // We will filter the results to only include instances that are running on KVM hypervisors by checking the compute host name pattern. + // This assumes that all KVM hypervisors have a compute host name that follows the pattern "nodeXXX-bbYYY", + // which is a naming convention in SAP Cloud Infrastructure and may need to be adjusted based on the actual environment. + query := ` + SELECT + s.tenant_id AS project_id, + COALESCE(p.name, '') AS project_name, + COALESCE(p.domain_id, '') AS domain_id, + COALESCE(d.name, '') AS domain_name, + s.os_ext_srv_attr_host AS compute_host, + s.os_ext_az_availability_zone AS availability_zone, + s.flavor_name, + COUNT(*) AS instance_count + FROM ` + nova.Server{}.TableName() + ` s + LEFT JOIN ` + identity.Project{}.TableName() + ` p ON p.id = s.tenant_id + LEFT JOIN ` + identity.Domain{}.TableName() + ` d ON d.id = p.domain_id + WHERE s.status NOT IN ('DELETED', 'ERROR') + AND s.os_ext_srv_attr_host LIKE '` + kvmComputeHostPattern + `' + GROUP BY s.tenant_id, p.name, p.domain_id, d.name, s.os_ext_srv_attr_host, s.flavor_name, s.os_ext_az_availability_zone + ` + var usages []kvmProjectInstanceCount + if _, err := k.DB.Select(&usages, query); err != nil { + return nil, err + } + return usages, nil +} diff --git a/internal/knowledge/kpis/plugins/infrastructure/kvm_project_utilization_test.go b/internal/knowledge/kpis/plugins/infrastructure/kvm_project_utilization_test.go new file mode 100644 index 000000000..68565e4b3 --- /dev/null +++ b/internal/knowledge/kpis/plugins/infrastructure/kvm_project_utilization_test.go @@ -0,0 +1,784 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package infrastructure + +import ( + "reflect" + "strings" + "testing" + + "github.com/cobaltcore-dev/cortex/internal/knowledge/datasources/plugins/openstack/identity" + "github.com/cobaltcore-dev/cortex/internal/knowledge/datasources/plugins/openstack/nova" + "github.com/cobaltcore-dev/cortex/internal/knowledge/db" + testlibDB "github.com/cobaltcore-dev/cortex/internal/knowledge/db/testing" + "github.com/cobaltcore-dev/cortex/pkg/conf" + hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" + "github.com/prometheus/client_golang/prometheus" + prometheusgo "github.com/prometheus/client_model/go" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +type collectedKVMMetric struct { + Name string + Labels map[string]string + Value float64 +} + +func mockKVMHostLabels(computeHost, az string) map[string]string { + bb := "unknown" + parts := strings.Split(computeHost, "-") + if len(parts) > 1 { + bb = parts[1] + } + return map[string]string{ + "compute_host": computeHost, + "availability_zone": az, + "building_block": bb, + "cpu_architecture": "cascade-lake", + "workload_type": "general-purpose", + "enabled": "true", + "decommissioned": "false", + "external_customer": "false", + "maintenance": "false", + } +} + +func buildKVMMetricKey(name string, labels map[string]string) string { + switch name { + case "cortex_kvm_project_instances": + return name + "|" + labels["compute_host"] + "|" + labels["project_id"] + + "|" + labels["flavor_name"] + "|" + labels["availability_zone"] + case "cortex_kvm_project_capacity_usage": + return name + "|" + labels["compute_host"] + "|" + labels["project_id"] + + "|" + labels["availability_zone"] + "|" + labels["resource"] + default: + return name + } +} + +func kvmInstanceMetric(computeHost, az, projectID, projectName, domainID, domainName, flavorName string, value float64) collectedKVMMetric { + labels := mockKVMHostLabels(computeHost, az) + labels["project_id"] = projectID + labels["project_name"] = projectName + labels["domain_id"] = domainID + labels["domain_name"] = domainName + labels["flavor_name"] = flavorName + return collectedKVMMetric{Name: "cortex_kvm_project_instances", Labels: labels, Value: value} +} + +func kvmCapacityMetric(computeHost, az, projectID, projectName, domainID, domainName, resource string, value float64) collectedKVMMetric { + labels := mockKVMHostLabels(computeHost, az) + labels["project_id"] = projectID + labels["project_name"] = projectName + labels["domain_id"] = domainID + labels["domain_name"] = domainName + labels["resource"] = resource + return collectedKVMMetric{Name: "cortex_kvm_project_capacity_usage", Labels: labels, Value: value} +} + +func buildKVMHypervisorClient(t *testing.T, hvs []hv1.Hypervisor) *fake.ClientBuilder { + t.Helper() + s := runtime.NewScheme() + if err := hv1.AddToScheme(s); err != nil { + t.Fatalf("failed to add hv1 scheme: %v", err) + } + var objects []runtime.Object + for i := range hvs { + objects = append(objects, &hvs[i]) + } + return fake.NewClientBuilder().WithScheme(s).WithRuntimeObjects(objects...) +} + +func TestKVMProjectUtilizationKPI_Init(t *testing.T) { + dbEnv := testlibDB.SetupDBEnv(t) + testDB := db.DB{DbMap: dbEnv.DbMap} + defer dbEnv.Close() + kpi := &KVMProjectUtilizationKPI{} + if err := kpi.Init(&testDB, nil, conf.NewRawOpts("{}")); err != nil { + t.Fatalf("expected no error, got %v", err) + } +} + +func TestKVMProjectUtilizationKPI_getKVMHosts(t *testing.T) { + hvs := []hv1.Hypervisor{ + {ObjectMeta: metav1.ObjectMeta{Name: "node001-bb01"}}, + {ObjectMeta: metav1.ObjectMeta{Name: "node002-bb01"}}, + } + + clientBuilder := buildKVMHypervisorClient(t, hvs) + kpi := &KVMProjectUtilizationKPI{} + kpi.Client = clientBuilder.Build() + + hostMapping, err := kpi.getKVMHosts() + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + + if len(hostMapping) != len(hvs) { + t.Fatalf("expected %d hosts, got %d", len(hvs), len(hostMapping)) + } + for _, hv := range hvs { + host, ok := hostMapping[hv.Name] + if !ok { + t.Fatalf("expected host %s not found in mapping", hv.Name) + } + if host.Name != hv.Name { + t.Errorf("host name mismatch: expected %s, got %s", hv.Name, host.Name) + } + } +} + +func TestKVMProjectUtilizationKPI_queryProjectInstanceCount(t *testing.T) { + tests := []struct { + name string + servers []nova.Server + projects []identity.Project + domains []identity.Domain + expectedCounts map[string]kvmProjectInstanceCount + }{ + { + name: "single instance in one project", + servers: []nova.Server{ + {ID: "server-1", TenantID: "project-1", OSEXTSRVATTRHost: "node001-bb01", FlavorName: "flavor-1", Status: "ACTIVE", OSEXTAvailabilityZone: "az1"}, + }, + projects: []identity.Project{{ID: "project-1", Name: "Project One", DomainID: "domain-1"}}, + domains: []identity.Domain{{ID: "domain-1", Name: "Domain One"}}, + expectedCounts: map[string]kvmProjectInstanceCount{ + "project-1|node001-bb01|flavor-1|az1": {ProjectID: "project-1", ProjectName: "Project One", DomainID: "domain-1", DomainName: "Domain One", ComputeHost: "node001-bb01", FlavorName: "flavor-1", AvailabilityZone: "az1", InstanceCount: 1}, + }, + }, + { + name: "multiple instances across projects and hosts", + servers: []nova.Server{ + {ID: "server-1", TenantID: "project-1", OSEXTSRVATTRHost: "node001-bb01", FlavorName: "flavor-1", Status: "ACTIVE", OSEXTAvailabilityZone: "az1"}, + {ID: "server-2", TenantID: "project-1", OSEXTSRVATTRHost: "node001-bb01", FlavorName: "flavor-2", Status: "ACTIVE", OSEXTAvailabilityZone: "az1"}, + {ID: "server-3", TenantID: "project-2", OSEXTSRVATTRHost: "node002-bb02", FlavorName: "flavor-1", Status: "ACTIVE", OSEXTAvailabilityZone: "az2"}, + {ID: "server-4", TenantID: "project-2", OSEXTSRVATTRHost: "node002-bb02", FlavorName: "flavor-2", Status: "ACTIVE", OSEXTAvailabilityZone: "az2"}, + }, + projects: []identity.Project{ + {ID: "project-1", Name: "Project One", DomainID: "domain-1"}, + {ID: "project-2", Name: "Project Two", DomainID: "domain-1"}, + }, + domains: []identity.Domain{{ID: "domain-1", Name: "Domain One"}}, + expectedCounts: map[string]kvmProjectInstanceCount{ + "project-1|node001-bb01|flavor-1|az1": {ProjectID: "project-1", ProjectName: "Project One", DomainID: "domain-1", DomainName: "Domain One", ComputeHost: "node001-bb01", FlavorName: "flavor-1", AvailabilityZone: "az1", InstanceCount: 1}, + "project-1|node001-bb01|flavor-2|az1": {ProjectID: "project-1", ProjectName: "Project One", DomainID: "domain-1", DomainName: "Domain One", ComputeHost: "node001-bb01", FlavorName: "flavor-2", AvailabilityZone: "az1", InstanceCount: 1}, + "project-2|node002-bb02|flavor-1|az2": {ProjectID: "project-2", ProjectName: "Project Two", DomainID: "domain-1", DomainName: "Domain One", ComputeHost: "node002-bb02", FlavorName: "flavor-1", AvailabilityZone: "az2", InstanceCount: 1}, + "project-2|node002-bb02|flavor-2|az2": {ProjectID: "project-2", ProjectName: "Project Two", DomainID: "domain-1", DomainName: "Domain One", ComputeHost: "node002-bb02", FlavorName: "flavor-2", AvailabilityZone: "az2", InstanceCount: 1}, + }, + }, + { + name: "instances on non-KVM hosts are excluded", + servers: []nova.Server{ + {ID: "server-1", TenantID: "project-1", OSEXTSRVATTRHost: "node001-bb01", FlavorName: "flavor-1", Status: "ACTIVE", OSEXTAvailabilityZone: "az1"}, + {ID: "server-2", TenantID: "project-1", OSEXTSRVATTRHost: "nova-compute-1", FlavorName: "flavor-2", Status: "ACTIVE", OSEXTAvailabilityZone: "az1"}, + }, + projects: []identity.Project{{ID: "project-1", Name: "Project One", DomainID: "domain-1"}}, + domains: []identity.Domain{{ID: "domain-1", Name: "Domain One"}}, + expectedCounts: map[string]kvmProjectInstanceCount{ + "project-1|node001-bb01|flavor-1|az1": {ProjectID: "project-1", ProjectName: "Project One", DomainID: "domain-1", DomainName: "Domain One", ComputeHost: "node001-bb01", FlavorName: "flavor-1", AvailabilityZone: "az1", InstanceCount: 1}, + }, + }, + { + name: "instances with non-ACTIVE status are excluded", + servers: []nova.Server{ + {ID: "server-1", TenantID: "project-1", OSEXTSRVATTRHost: "node001-bb01", FlavorName: "flavor-1", Status: "DELETED", OSEXTAvailabilityZone: "az1"}, + {ID: "server-2", TenantID: "project-1", OSEXTSRVATTRHost: "node001-bb01", FlavorName: "flavor-2", Status: "ERROR", OSEXTAvailabilityZone: "az1"}, + {ID: "server-3", TenantID: "project-1", OSEXTSRVATTRHost: "node001-bb01", FlavorName: "flavor-3", Status: "ACTIVE", OSEXTAvailabilityZone: "az1"}, + }, + projects: []identity.Project{{ID: "project-1", Name: "Project One", DomainID: "domain-1"}}, + domains: []identity.Domain{{ID: "domain-1", Name: "Domain One"}}, + expectedCounts: map[string]kvmProjectInstanceCount{ + "project-1|node001-bb01|flavor-3|az1": {ProjectID: "project-1", ProjectName: "Project One", DomainID: "domain-1", DomainName: "Domain One", ComputeHost: "node001-bb01", FlavorName: "flavor-3", AvailabilityZone: "az1", InstanceCount: 1}, + }, + }, + { + name: "multiple instances with same key are counted correctly", + servers: []nova.Server{ + {ID: "server-1", TenantID: "project-1", OSEXTSRVATTRHost: "node001-bb01", FlavorName: "flavor-1", Status: "ACTIVE", OSEXTAvailabilityZone: "az1"}, + {ID: "server-2", TenantID: "project-1", OSEXTSRVATTRHost: "node001-bb01", FlavorName: "flavor-1", Status: "ACTIVE", OSEXTAvailabilityZone: "az1"}, + {ID: "server-3", TenantID: "project-1", OSEXTSRVATTRHost: "node002-bb02", FlavorName: "flavor-1", Status: "ACTIVE", OSEXTAvailabilityZone: "az2"}, + {ID: "server-4", TenantID: "project-1", OSEXTSRVATTRHost: "node002-bb02", FlavorName: "flavor-1", Status: "ACTIVE", OSEXTAvailabilityZone: "az2"}, + }, + projects: []identity.Project{{ID: "project-1", Name: "Project One", DomainID: "domain-1"}}, + domains: []identity.Domain{{ID: "domain-1", Name: "Domain One"}}, + expectedCounts: map[string]kvmProjectInstanceCount{ + "project-1|node001-bb01|flavor-1|az1": {ProjectID: "project-1", ProjectName: "Project One", DomainID: "domain-1", DomainName: "Domain One", ComputeHost: "node001-bb01", FlavorName: "flavor-1", AvailabilityZone: "az1", InstanceCount: 2}, + "project-1|node002-bb02|flavor-1|az2": {ProjectID: "project-1", ProjectName: "Project One", DomainID: "domain-1", DomainName: "Domain One", ComputeHost: "node002-bb02", FlavorName: "flavor-1", AvailabilityZone: "az2", InstanceCount: 2}, + }, + }, + { + name: "project references non-existent domain results in empty domain fields", + servers: []nova.Server{ + {ID: "server-1", TenantID: "project-1", OSEXTSRVATTRHost: "node001-bb01", FlavorName: "flavor-1", Status: "ACTIVE", OSEXTAvailabilityZone: "az1"}, + }, + projects: []identity.Project{{ID: "project-1", Name: "Project One", DomainID: "domain-unknown"}}, + domains: []identity.Domain{}, + expectedCounts: map[string]kvmProjectInstanceCount{ + // The domain_id is extracted from the project record, so it should be "domain-unknown" even though there is no matching domain entry + "project-1|node001-bb01|flavor-1|az1": {ProjectID: "project-1", ProjectName: "Project One", DomainID: "domain-unknown", DomainName: "", ComputeHost: "node001-bb01", FlavorName: "flavor-1", AvailabilityZone: "az1", InstanceCount: 1}, + }, + }, + { + name: "missing project entry results in empty project_name and domain", + servers: []nova.Server{ + {ID: "server-1", TenantID: "project-1", OSEXTSRVATTRHost: "node001-bb01", FlavorName: "flavor-1", Status: "ACTIVE", OSEXTAvailabilityZone: "az1"}, + }, + projects: []identity.Project{}, + domains: []identity.Domain{}, + expectedCounts: map[string]kvmProjectInstanceCount{ + "project-1|node001-bb01|flavor-1|az1": {ProjectID: "project-1", ProjectName: "", DomainID: "", DomainName: "", ComputeHost: "node001-bb01", FlavorName: "flavor-1", AvailabilityZone: "az1", InstanceCount: 1}, + }, + }, + { + name: "no instances returns empty result", + servers: []nova.Server{}, + projects: []identity.Project{{ID: "project-1", Name: "Project One", DomainID: "domain-1"}}, + domains: []identity.Domain{{ID: "domain-1", Name: "Domain One"}}, + expectedCounts: map[string]kvmProjectInstanceCount{}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + dbEnv := testlibDB.SetupDBEnv(t) + testDB := db.DB{DbMap: dbEnv.DbMap} + defer dbEnv.Close() + + if err := testDB.CreateTable( + testDB.AddTable(nova.Server{}), + testDB.AddTable(identity.Project{}), + testDB.AddTable(identity.Domain{}), + ); err != nil { + t.Fatalf("failed to create tables: %v", err) + } + + var mockData []any + for i := range tt.servers { + mockData = append(mockData, &tt.servers[i]) + } + for i := range tt.projects { + mockData = append(mockData, &tt.projects[i]) + } + for i := range tt.domains { + mockData = append(mockData, &tt.domains[i]) + } + if len(mockData) > 0 { + if err := testDB.Insert(mockData...); err != nil { + t.Fatalf("expected no error, got %v", err) + } + } + + client := buildKVMHypervisorClient(t, []hv1.Hypervisor{}) + kpi := &KVMProjectUtilizationKPI{} + if err := kpi.Init(&testDB, client.Build(), conf.NewRawOpts("{}")); err != nil { + t.Fatalf("expected no error on Init, got %v", err) + } + counts, err := kpi.queryProjectInstanceCount() + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + + if len(counts) != len(tt.expectedCounts) { + t.Fatalf("expected %d counts, got %d", len(tt.expectedCounts), len(counts)) + } + for _, got := range counts { + key := got.ProjectID + "|" + got.ComputeHost + "|" + got.FlavorName + "|" + got.AvailabilityZone + exp, ok := tt.expectedCounts[key] + if !ok { + t.Errorf("unexpected count for key %q: %+v", key, got) + continue + } + if got != exp { + t.Errorf("count mismatch for key %q: expected %+v, got %+v", key, exp, got) + } + } + }) + } +} + +func TestKVMProjectUtilizationKPI_queryProjectCapacityUsage(t *testing.T) { + tests := []struct { + name string + servers []nova.Server + projects []identity.Project + domains []identity.Domain + flavors []nova.Flavor + expectedUsages map[string]kvmProjectCapacityUsage + }{ + { + name: "single instance with flavor details", + servers: []nova.Server{ + {ID: "server-1", TenantID: "project-1", OSEXTSRVATTRHost: "node001-bb01", FlavorName: "flavor-1", Status: "ACTIVE", OSEXTAvailabilityZone: "az1"}, + }, + projects: []identity.Project{{ID: "project-1", Name: "Project One", DomainID: "domain-1"}}, + domains: []identity.Domain{{ID: "domain-1", Name: "Domain One"}}, + flavors: []nova.Flavor{{ID: "f1", Name: "flavor-1", VCPUs: 2, RAM: 4096, Disk: 1}}, + expectedUsages: map[string]kvmProjectCapacityUsage{ + "project-1|node001-bb01|az1": {ProjectID: "project-1", ProjectName: "Project One", DomainID: "domain-1", DomainName: "Domain One", ComputeHost: "node001-bb01", AvailabilityZone: "az1", TotalVCPUs: 2, TotalRAMMB: 4096, TotalDiskGB: 1}, + }, + }, + { + name: "multiple instances with different flavors and projects", + servers: []nova.Server{ + {ID: "server-1", TenantID: "project-1", OSEXTSRVATTRHost: "node001-bb01", FlavorName: "flavor-1", Status: "ACTIVE", OSEXTAvailabilityZone: "az1"}, + {ID: "server-2", TenantID: "project-1", OSEXTSRVATTRHost: "node001-bb01", FlavorName: "flavor-2", Status: "ACTIVE", OSEXTAvailabilityZone: "az1"}, + {ID: "server-3", TenantID: "project-2", OSEXTSRVATTRHost: "node002-bb02", FlavorName: "flavor-1", Status: "ACTIVE", OSEXTAvailabilityZone: "az2"}, + }, + projects: []identity.Project{ + {ID: "project-1", Name: "Project One", DomainID: "domain-1"}, + {ID: "project-2", Name: "Project Two", DomainID: "domain-1"}, + }, + domains: []identity.Domain{{ID: "domain-1", Name: "Domain One"}}, + flavors: []nova.Flavor{ + {ID: "f1", Name: "flavor-1", VCPUs: 2, RAM: 4096, Disk: 1}, + {ID: "f2", Name: "flavor-2", VCPUs: 4, RAM: 8192, Disk: 2}, + }, + expectedUsages: map[string]kvmProjectCapacityUsage{ + "project-1|node001-bb01|az1": {ProjectID: "project-1", ProjectName: "Project One", DomainID: "domain-1", DomainName: "Domain One", ComputeHost: "node001-bb01", AvailabilityZone: "az1", TotalVCPUs: 6, TotalRAMMB: 12288, TotalDiskGB: 3}, + "project-2|node002-bb02|az2": {ProjectID: "project-2", ProjectName: "Project Two", DomainID: "domain-1", DomainName: "Domain One", ComputeHost: "node002-bb02", AvailabilityZone: "az2", TotalVCPUs: 2, TotalRAMMB: 4096, TotalDiskGB: 1}, + }, + }, + { + name: "missing flavor entry results in zero capacity", + servers: []nova.Server{ + {ID: "server-1", TenantID: "project-1", OSEXTSRVATTRHost: "node001-bb01", FlavorName: "flavor-missing", Status: "ACTIVE", OSEXTAvailabilityZone: "az1"}, + }, + projects: []identity.Project{{ID: "project-1", Name: "Project One", DomainID: "domain-1"}}, + domains: []identity.Domain{{ID: "domain-1", Name: "Domain One"}}, + flavors: []nova.Flavor{{ID: "f1", Name: "flavor-1", VCPUs: 2, RAM: 4096, Disk: 1}}, + expectedUsages: map[string]kvmProjectCapacityUsage{ + "project-1|node001-bb01|az1": {ProjectID: "project-1", ProjectName: "Project One", DomainID: "domain-1", DomainName: "Domain One", ComputeHost: "node001-bb01", AvailabilityZone: "az1", TotalVCPUs: 0, TotalRAMMB: 0, TotalDiskGB: 0}, + }, + }, + { + name: "instances on non-KVM hosts are excluded", + servers: []nova.Server{ + {ID: "server-1", TenantID: "project-1", OSEXTSRVATTRHost: "nova-compute-1", FlavorName: "flavor-1", Status: "ACTIVE", OSEXTAvailabilityZone: "az1"}, + }, + projects: []identity.Project{{ID: "project-1", Name: "Project One", DomainID: "domain-1"}}, + domains: []identity.Domain{{ID: "domain-1", Name: "Domain One"}}, + flavors: []nova.Flavor{{ID: "f1", Name: "flavor-1", VCPUs: 2, RAM: 4096, Disk: 1}}, + expectedUsages: map[string]kvmProjectCapacityUsage{}, + }, + { + name: "instances with non-ACTIVE status are excluded", + servers: []nova.Server{ + {ID: "server-1", TenantID: "project-1", OSEXTSRVATTRHost: "node001-bb01", FlavorName: "flavor-1", Status: "DELETED", OSEXTAvailabilityZone: "az1"}, + }, + projects: []identity.Project{{ID: "project-1", Name: "Project One", DomainID: "domain-1"}}, + domains: []identity.Domain{{ID: "domain-1", Name: "Domain One"}}, + flavors: []nova.Flavor{{ID: "f1", Name: "flavor-1", VCPUs: 2, RAM: 4096, Disk: 1}}, + expectedUsages: map[string]kvmProjectCapacityUsage{}, + }, + { + name: "no instances returns empty capacity usage", + servers: []nova.Server{}, + projects: []identity.Project{ + {ID: "project-1", Name: "Project One", DomainID: "domain-1"}, + }, + domains: []identity.Domain{{ID: "domain-1", Name: "Domain One"}}, + flavors: []nova.Flavor{{ID: "f1", Name: "flavor-1", VCPUs: 2, RAM: 4096, Disk: 1}}, + expectedUsages: map[string]kvmProjectCapacityUsage{}, + }, + { + name: "multiple instances with same flavor aggregate capacity correctly", + servers: []nova.Server{ + {ID: "server-1", TenantID: "project-1", OSEXTSRVATTRHost: "node001-bb01", FlavorName: "flavor-1", Status: "ACTIVE", OSEXTAvailabilityZone: "az1"}, + {ID: "server-2", TenantID: "project-1", OSEXTSRVATTRHost: "node001-bb01", FlavorName: "flavor-1", Status: "ACTIVE", OSEXTAvailabilityZone: "az1"}, + }, + projects: []identity.Project{{ID: "project-1", Name: "Project One", DomainID: "domain-1"}}, + domains: []identity.Domain{{ID: "domain-1", Name: "Domain One"}}, + flavors: []nova.Flavor{{ID: "f1", Name: "flavor-1", VCPUs: 2, RAM: 4096, Disk: 1}}, + expectedUsages: map[string]kvmProjectCapacityUsage{ + "project-1|node001-bb01|az1": {ProjectID: "project-1", ProjectName: "Project One", DomainID: "domain-1", DomainName: "Domain One", ComputeHost: "node001-bb01", AvailabilityZone: "az1", TotalVCPUs: 4, TotalRAMMB: 8192, TotalDiskGB: 2}, + }, + }, + { + name: "project references non-existent domain results in empty domain fields", + servers: []nova.Server{ + {ID: "server-1", TenantID: "project-1", OSEXTSRVATTRHost: "node001-bb01", FlavorName: "flavor-1", Status: "ACTIVE", OSEXTAvailabilityZone: "az1"}, + }, + projects: []identity.Project{{ID: "project-1", Name: "Project One", DomainID: "domain-unknown"}}, + domains: []identity.Domain{}, + flavors: []nova.Flavor{{ID: "f1", Name: "flavor-1", VCPUs: 2, RAM: 4096, Disk: 1}}, + expectedUsages: map[string]kvmProjectCapacityUsage{ + // The domain_id is extracted from the project record, so it should be "domain-unknown" even though there is no matching domain entry + "project-1|node001-bb01|az1": {ProjectID: "project-1", ProjectName: "Project One", DomainID: "domain-unknown", DomainName: "", ComputeHost: "node001-bb01", AvailabilityZone: "az1", TotalVCPUs: 2, TotalRAMMB: 4096, TotalDiskGB: 1}, + }, + }, + { + name: "missing project entry results in empty project_name and domain", + servers: []nova.Server{ + {ID: "server-1", TenantID: "project-1", OSEXTSRVATTRHost: "node001-bb01", FlavorName: "flavor-1", Status: "ACTIVE", OSEXTAvailabilityZone: "az1"}, + }, + projects: []identity.Project{}, + domains: []identity.Domain{}, + flavors: []nova.Flavor{{ID: "f1", Name: "flavor-1", VCPUs: 2, RAM: 4096, Disk: 1}}, + expectedUsages: map[string]kvmProjectCapacityUsage{ + "project-1|node001-bb01|az1": {ProjectID: "project-1", ProjectName: "", DomainID: "", DomainName: "", ComputeHost: "node001-bb01", AvailabilityZone: "az1", TotalVCPUs: 2, TotalRAMMB: 4096, TotalDiskGB: 1}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + dbEnv := testlibDB.SetupDBEnv(t) + testDB := db.DB{DbMap: dbEnv.DbMap} + defer dbEnv.Close() + + if err := testDB.CreateTable( + testDB.AddTable(nova.Server{}), + testDB.AddTable(identity.Project{}), + testDB.AddTable(identity.Domain{}), + testDB.AddTable(nova.Flavor{}), + ); err != nil { + t.Fatalf("failed to create tables: %v", err) + } + + var mockData []any + for i := range tt.servers { + mockData = append(mockData, &tt.servers[i]) + } + for i := range tt.projects { + mockData = append(mockData, &tt.projects[i]) + } + for i := range tt.domains { + mockData = append(mockData, &tt.domains[i]) + } + for i := range tt.flavors { + mockData = append(mockData, &tt.flavors[i]) + } + if len(mockData) > 0 { + if err := testDB.Insert(mockData...); err != nil { + t.Fatalf("expected no error, got %v", err) + } + } + + client := buildKVMHypervisorClient(t, []hv1.Hypervisor{}) + kpi := &KVMProjectUtilizationKPI{} + if err := kpi.Init(&testDB, client.Build(), conf.NewRawOpts("{}")); err != nil { + t.Fatalf("expected no error on Init, got %v", err) + } + usages, err := kpi.queryProjectCapacityUsage() + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + + if len(usages) != len(tt.expectedUsages) { + t.Fatalf("expected %d usages, got %d", len(tt.expectedUsages), len(usages)) + } + for _, got := range usages { + key := got.ProjectID + "|" + got.ComputeHost + "|" + got.AvailabilityZone + exp, ok := tt.expectedUsages[key] + if !ok { + t.Errorf("unexpected usage for key %q: %+v", key, got) + continue + } + if got != exp { + t.Errorf("usage mismatch for key %q: expected %+v, got %+v", key, exp, got) + } + } + }) + } +} + +func TestKVMProjectUtilizationKPI_Collect(t *testing.T) { + tests := []struct { + name string + servers []nova.Server + projects []identity.Project + domains []identity.Domain + flavors []nova.Flavor + hypervisors []hv1.Hypervisor + expectedMetrics []collectedKVMMetric + }{ + { + name: "single instance in one project", + servers: []nova.Server{ + {ID: "s1", TenantID: "project-1", OSEXTSRVATTRHost: "node001-bb01", FlavorName: "flavor-1", Status: "ACTIVE", OSEXTAvailabilityZone: "az1"}, + }, + projects: []identity.Project{{ID: "project-1", Name: "Project One", DomainID: "domain-1"}}, + domains: []identity.Domain{{ID: "domain-1", Name: "Domain One"}}, + flavors: []nova.Flavor{{ID: "f1", Name: "flavor-1", VCPUs: 2, RAM: 4096, Disk: 1}}, + hypervisors: []hv1.Hypervisor{ + {ObjectMeta: metav1.ObjectMeta{Name: "node001-bb01", Labels: map[string]string{"topology.kubernetes.io/zone": "az1"}}}, + }, + expectedMetrics: []collectedKVMMetric{ + kvmInstanceMetric("node001-bb01", "az1", "project-1", "Project One", "domain-1", "Domain One", "flavor-1", 1), + kvmCapacityMetric("node001-bb01", "az1", "project-1", "Project One", "domain-1", "Domain One", "vcpu", 2), + kvmCapacityMetric("node001-bb01", "az1", "project-1", "Project One", "domain-1", "Domain One", "memory", 4096*1024*1024), + kvmCapacityMetric("node001-bb01", "az1", "project-1", "Project One", "domain-1", "Domain One", "disk", 1*1024*1024*1024), + }, + }, + { + name: "multiple instances across hosts, projects, and flavors", + servers: []nova.Server{ + {ID: "s1", TenantID: "project-1", OSEXTSRVATTRHost: "node001-bb01", FlavorName: "flavor-1", Status: "ACTIVE", OSEXTAvailabilityZone: "az1"}, + {ID: "s2", TenantID: "project-1", OSEXTSRVATTRHost: "node001-bb01", FlavorName: "flavor-2", Status: "ACTIVE", OSEXTAvailabilityZone: "az1"}, + {ID: "s3", TenantID: "project-2", OSEXTSRVATTRHost: "node002-bb02", FlavorName: "flavor-1", Status: "ACTIVE", OSEXTAvailabilityZone: "az2"}, + }, + projects: []identity.Project{ + {ID: "project-1", Name: "Project One", DomainID: "domain-1"}, + {ID: "project-2", Name: "Project Two", DomainID: "domain-1"}, + }, + domains: []identity.Domain{{ID: "domain-1", Name: "Domain One"}}, + flavors: []nova.Flavor{ + {ID: "f1", Name: "flavor-1", VCPUs: 2, RAM: 4096, Disk: 1}, + {ID: "f2", Name: "flavor-2", VCPUs: 4, RAM: 8192, Disk: 2}, + }, + hypervisors: []hv1.Hypervisor{ + {ObjectMeta: metav1.ObjectMeta{Name: "node001-bb01", Labels: map[string]string{"topology.kubernetes.io/zone": "az1"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "node002-bb02", Labels: map[string]string{"topology.kubernetes.io/zone": "az2"}}}, + }, + expectedMetrics: []collectedKVMMetric{ + kvmInstanceMetric("node001-bb01", "az1", "project-1", "Project One", "domain-1", "Domain One", "flavor-1", 1), + kvmInstanceMetric("node001-bb01", "az1", "project-1", "Project One", "domain-1", "Domain One", "flavor-2", 1), + kvmInstanceMetric("node002-bb02", "az2", "project-2", "Project Two", "domain-1", "Domain One", "flavor-1", 1), + // node001-bb01/project-1: 1*flavor-1 + 1*flavor-2 + kvmCapacityMetric("node001-bb01", "az1", "project-1", "Project One", "domain-1", "Domain One", "vcpu", 6), + kvmCapacityMetric("node001-bb01", "az1", "project-1", "Project One", "domain-1", "Domain One", "memory", 12288*1024*1024), + kvmCapacityMetric("node001-bb01", "az1", "project-1", "Project One", "domain-1", "Domain One", "disk", 3*1024*1024*1024), + // node002-bb02/project-2: 1*flavor-1 + kvmCapacityMetric("node002-bb02", "az2", "project-2", "Project Two", "domain-1", "Domain One", "vcpu", 2), + kvmCapacityMetric("node002-bb02", "az2", "project-2", "Project Two", "domain-1", "Domain One", "memory", 4096*1024*1024), + kvmCapacityMetric("node002-bb02", "az2", "project-2", "Project Two", "domain-1", "Domain One", "disk", 1*1024*1024*1024), + }, + }, + { + name: "non-KVM hosts are excluded from metrics", + servers: []nova.Server{ + {ID: "s1", TenantID: "project-1", OSEXTSRVATTRHost: "node001-bb01", FlavorName: "flavor-1", Status: "ACTIVE", OSEXTAvailabilityZone: "az1"}, + {ID: "s2", TenantID: "project-1", OSEXTSRVATTRHost: "nova-compute-1", FlavorName: "flavor-1", Status: "ACTIVE", OSEXTAvailabilityZone: "az1"}, + }, + projects: []identity.Project{{ID: "project-1", Name: "Project One", DomainID: "domain-1"}}, + domains: []identity.Domain{{ID: "domain-1", Name: "Domain One"}}, + flavors: []nova.Flavor{{ID: "f1", Name: "flavor-1", VCPUs: 2, RAM: 4096, Disk: 1}}, + hypervisors: []hv1.Hypervisor{ + {ObjectMeta: metav1.ObjectMeta{Name: "node001-bb01", Labels: map[string]string{"topology.kubernetes.io/zone": "az1"}}}, + }, + expectedMetrics: []collectedKVMMetric{ + kvmInstanceMetric("node001-bb01", "az1", "project-1", "Project One", "domain-1", "Domain One", "flavor-1", 1), + kvmCapacityMetric("node001-bb01", "az1", "project-1", "Project One", "domain-1", "Domain One", "vcpu", 2), + kvmCapacityMetric("node001-bb01", "az1", "project-1", "Project One", "domain-1", "Domain One", "memory", 4096*1024*1024), + kvmCapacityMetric("node001-bb01", "az1", "project-1", "Project One", "domain-1", "Domain One", "disk", 1*1024*1024*1024), + }, + }, + { + name: "DELETED and ERROR instances are excluded", + servers: []nova.Server{ + {ID: "s1", TenantID: "project-1", OSEXTSRVATTRHost: "node001-bb01", FlavorName: "flavor-1", Status: "DELETED", OSEXTAvailabilityZone: "az1"}, + {ID: "s2", TenantID: "project-1", OSEXTSRVATTRHost: "node001-bb01", FlavorName: "flavor-2", Status: "ERROR", OSEXTAvailabilityZone: "az1"}, + {ID: "s3", TenantID: "project-1", OSEXTSRVATTRHost: "node001-bb01", FlavorName: "flavor-3", Status: "ACTIVE", OSEXTAvailabilityZone: "az1"}, + }, + projects: []identity.Project{{ID: "project-1", Name: "Project One", DomainID: "domain-1"}}, + domains: []identity.Domain{{ID: "domain-1", Name: "Domain One"}}, + flavors: []nova.Flavor{ + {ID: "f1", Name: "flavor-1", VCPUs: 2, RAM: 4096, Disk: 1}, + {ID: "f2", Name: "flavor-2", VCPUs: 4, RAM: 8192, Disk: 2}, + {ID: "f3", Name: "flavor-3", VCPUs: 8, RAM: 16384, Disk: 4}, + }, + hypervisors: []hv1.Hypervisor{ + {ObjectMeta: metav1.ObjectMeta{Name: "node001-bb01", Labels: map[string]string{"topology.kubernetes.io/zone": "az1"}}}, + }, + expectedMetrics: []collectedKVMMetric{ + kvmInstanceMetric("node001-bb01", "az1", "project-1", "Project One", "domain-1", "Domain One", "flavor-3", 1), + kvmCapacityMetric("node001-bb01", "az1", "project-1", "Project One", "domain-1", "Domain One", "vcpu", 8), + kvmCapacityMetric("node001-bb01", "az1", "project-1", "Project One", "domain-1", "Domain One", "memory", 16384*1024*1024), + kvmCapacityMetric("node001-bb01", "az1", "project-1", "Project One", "domain-1", "Domain One", "disk", 4*1024*1024*1024), + }, + }, + { + name: "multiple instances with same flavor are aggregated correctly", + servers: []nova.Server{ + {ID: "s1", TenantID: "project-1", OSEXTSRVATTRHost: "node001-bb01", FlavorName: "flavor-1", Status: "ACTIVE", OSEXTAvailabilityZone: "az1"}, + {ID: "s2", TenantID: "project-1", OSEXTSRVATTRHost: "node001-bb01", FlavorName: "flavor-1", Status: "ACTIVE", OSEXTAvailabilityZone: "az1"}, + {ID: "s3", TenantID: "project-1", OSEXTSRVATTRHost: "node002-bb02", FlavorName: "flavor-1", Status: "ACTIVE", OSEXTAvailabilityZone: "az2"}, + {ID: "s4", TenantID: "project-1", OSEXTSRVATTRHost: "node002-bb02", FlavorName: "flavor-1", Status: "ACTIVE", OSEXTAvailabilityZone: "az2"}, + }, + projects: []identity.Project{{ID: "project-1", Name: "Project One", DomainID: "domain-1"}}, + domains: []identity.Domain{{ID: "domain-1", Name: "Domain One"}}, + flavors: []nova.Flavor{{ID: "f1", Name: "flavor-1", VCPUs: 2, RAM: 4096, Disk: 1}}, + hypervisors: []hv1.Hypervisor{ + {ObjectMeta: metav1.ObjectMeta{Name: "node001-bb01", Labels: map[string]string{"topology.kubernetes.io/zone": "az1"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "node002-bb02", Labels: map[string]string{"topology.kubernetes.io/zone": "az2"}}}, + }, + expectedMetrics: []collectedKVMMetric{ + kvmInstanceMetric("node001-bb01", "az1", "project-1", "Project One", "domain-1", "Domain One", "flavor-1", 2), + kvmInstanceMetric("node002-bb02", "az2", "project-1", "Project One", "domain-1", "Domain One", "flavor-1", 2), + kvmCapacityMetric("node001-bb01", "az1", "project-1", "Project One", "domain-1", "Domain One", "vcpu", 4), + kvmCapacityMetric("node001-bb01", "az1", "project-1", "Project One", "domain-1", "Domain One", "memory", 2*4096*1024*1024), + kvmCapacityMetric("node001-bb01", "az1", "project-1", "Project One", "domain-1", "Domain One", "disk", 2*1024*1024*1024), + kvmCapacityMetric("node002-bb02", "az2", "project-1", "Project One", "domain-1", "Domain One", "vcpu", 4), + kvmCapacityMetric("node002-bb02", "az2", "project-1", "Project One", "domain-1", "Domain One", "memory", 2*4096*1024*1024), + kvmCapacityMetric("node002-bb02", "az2", "project-1", "Project One", "domain-1", "Domain One", "disk", 2*1024*1024*1024), + }, + }, + { + name: "compute host not in hypervisor list produces no metrics", + servers: []nova.Server{ + {ID: "s1", TenantID: "project-1", OSEXTSRVATTRHost: "node001-bb01", FlavorName: "flavor-1", Status: "ACTIVE", OSEXTAvailabilityZone: "az1"}, + }, + projects: []identity.Project{{ID: "project-1", Name: "Project One", DomainID: "domain-1"}}, + domains: []identity.Domain{{ID: "domain-1", Name: "Domain One"}}, + flavors: []nova.Flavor{{ID: "f1", Name: "flavor-1", VCPUs: 2, RAM: 4096, Disk: 1}}, + hypervisors: []hv1.Hypervisor{}, + expectedMetrics: []collectedKVMMetric{}, + }, + { + name: "project references non-existent domain results in empty domain labels", + servers: []nova.Server{ + {ID: "s1", TenantID: "project-1", OSEXTSRVATTRHost: "node001-bb01", FlavorName: "flavor-1", Status: "ACTIVE", OSEXTAvailabilityZone: "az1"}, + }, + projects: []identity.Project{{ID: "project-1", Name: "Project One", DomainID: "domain-unknown"}}, + domains: []identity.Domain{}, + flavors: []nova.Flavor{{ID: "f1", Name: "flavor-1", VCPUs: 2, RAM: 4096, Disk: 1}}, + hypervisors: []hv1.Hypervisor{ + {ObjectMeta: metav1.ObjectMeta{Name: "node001-bb01", Labels: map[string]string{"topology.kubernetes.io/zone": "az1"}}}, + }, + expectedMetrics: []collectedKVMMetric{ + // The domain_id is extracted from the project record, so it should be "domain-unknown" even though there is no matching domain entry + kvmInstanceMetric("node001-bb01", "az1", "project-1", "Project One", "domain-unknown", "", "flavor-1", 1), + kvmCapacityMetric("node001-bb01", "az1", "project-1", "Project One", "domain-unknown", "", "vcpu", 2), + kvmCapacityMetric("node001-bb01", "az1", "project-1", "Project One", "domain-unknown", "", "memory", 4096*1024*1024), + kvmCapacityMetric("node001-bb01", "az1", "project-1", "Project One", "domain-unknown", "", "disk", 1*1024*1024*1024), + }, + }, + { + name: "missing project entry results in empty project_name and domain labels", + servers: []nova.Server{ + {ID: "s1", TenantID: "project-1", OSEXTSRVATTRHost: "node001-bb01", FlavorName: "flavor-1", Status: "ACTIVE", OSEXTAvailabilityZone: "az1"}, + }, + projects: []identity.Project{}, + domains: []identity.Domain{}, + flavors: []nova.Flavor{{ID: "f1", Name: "flavor-1", VCPUs: 2, RAM: 4096, Disk: 1}}, + hypervisors: []hv1.Hypervisor{ + {ObjectMeta: metav1.ObjectMeta{Name: "node001-bb01", Labels: map[string]string{"topology.kubernetes.io/zone": "az1"}}}, + }, + expectedMetrics: []collectedKVMMetric{ + kvmInstanceMetric("node001-bb01", "az1", "project-1", "", "", "", "flavor-1", 1), + kvmCapacityMetric("node001-bb01", "az1", "project-1", "", "", "", "vcpu", 2), + kvmCapacityMetric("node001-bb01", "az1", "project-1", "", "", "", "memory", 4096*1024*1024), + kvmCapacityMetric("node001-bb01", "az1", "project-1", "", "", "", "disk", 1*1024*1024*1024), + }, + }, + { + name: "missing flavor entry results in zero capacity", + servers: []nova.Server{ + {ID: "s1", TenantID: "project-1", OSEXTSRVATTRHost: "node001-bb01", FlavorName: "flavor-missing", Status: "ACTIVE", OSEXTAvailabilityZone: "az1"}, + }, + projects: []identity.Project{{ID: "project-1", Name: "Project One", DomainID: "domain-1"}}, + domains: []identity.Domain{{ID: "domain-1", Name: "Domain One"}}, + flavors: []nova.Flavor{}, + hypervisors: []hv1.Hypervisor{ + {ObjectMeta: metav1.ObjectMeta{Name: "node001-bb01", Labels: map[string]string{"topology.kubernetes.io/zone": "az1"}}}, + }, + expectedMetrics: []collectedKVMMetric{ + kvmInstanceMetric("node001-bb01", "az1", "project-1", "Project One", "domain-1", "Domain One", "flavor-missing", 1), + kvmCapacityMetric("node001-bb01", "az1", "project-1", "Project One", "domain-1", "Domain One", "vcpu", 0), + kvmCapacityMetric("node001-bb01", "az1", "project-1", "Project One", "domain-1", "Domain One", "memory", 0), + kvmCapacityMetric("node001-bb01", "az1", "project-1", "Project One", "domain-1", "Domain One", "disk", 0), + }, + }, + { + name: "no instances produces no metrics", + servers: []nova.Server{}, + projects: []identity.Project{ + {ID: "project-1", Name: "Project One", DomainID: "domain-1"}, + }, + domains: []identity.Domain{{ID: "domain-1", Name: "Domain One"}}, + flavors: []nova.Flavor{ + {ID: "f1", Name: "flavor-1", VCPUs: 2, RAM: 4096, Disk: 1}, + }, + hypervisors: []hv1.Hypervisor{ + {ObjectMeta: metav1.ObjectMeta{Name: "node001-bb01", Labels: map[string]string{"topology.kubernetes.io/zone": "az1"}}}, + }, + expectedMetrics: []collectedKVMMetric{}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + dbEnv := testlibDB.SetupDBEnv(t) + testDB := db.DB{DbMap: dbEnv.DbMap} + defer dbEnv.Close() + + if err := testDB.CreateTable( + testDB.AddTable(nova.Server{}), + testDB.AddTable(identity.Project{}), + testDB.AddTable(identity.Domain{}), + testDB.AddTable(nova.Flavor{}), + ); err != nil { + t.Fatalf("failed to create tables: %v", err) + } + + var mockData []any + for i := range tt.servers { + mockData = append(mockData, &tt.servers[i]) + } + for i := range tt.projects { + mockData = append(mockData, &tt.projects[i]) + } + for i := range tt.domains { + mockData = append(mockData, &tt.domains[i]) + } + for i := range tt.flavors { + mockData = append(mockData, &tt.flavors[i]) + } + if len(mockData) > 0 { + if err := testDB.Insert(mockData...); err != nil { + t.Fatalf("expected no error inserting data, got %v", err) + } + } + + client := buildKVMHypervisorClient(t, tt.hypervisors) + kpi := &KVMProjectUtilizationKPI{} + if err := kpi.Init(&testDB, client.Build(), conf.NewRawOpts("{}")); err != nil { + t.Fatalf("expected no error on Init, got %v", err) + } + + ch := make(chan prometheus.Metric, 100) + kpi.Collect(ch) + close(ch) + + actual := make(map[string]collectedKVMMetric) + for m := range ch { + var pm prometheusgo.Metric + if err := m.Write(&pm); err != nil { + t.Fatalf("failed to write metric: %v", err) + } + labels := make(map[string]string) + for _, lbl := range pm.Label { + labels[lbl.GetName()] = lbl.GetValue() + } + name := getMetricName(m.Desc().String()) + key := buildKVMMetricKey(name, labels) + if _, exists := actual[key]; exists { + t.Fatalf("duplicate metric key %q", key) + } + actual[key] = collectedKVMMetric{Name: name, Labels: labels, Value: pm.GetGauge().GetValue()} + } + + if len(actual) != len(tt.expectedMetrics) { + t.Errorf("expected %d metrics, got %d: actual=%v", len(tt.expectedMetrics), len(actual), actual) + } + for _, exp := range tt.expectedMetrics { + key := buildKVMMetricKey(exp.Name, exp.Labels) + got, ok := actual[key] + if !ok { + t.Errorf("missing metric %q", key) + continue + } + if got.Value != exp.Value { + t.Errorf("metric %q value: expected %v, got %v", key, exp.Value, got.Value) + } + if !reflect.DeepEqual(exp.Labels, got.Labels) { + t.Errorf("metric %q labels: expected %v, got %v", key, exp.Labels, got.Labels) + } + } + }) + } +} diff --git a/internal/knowledge/kpis/plugins/infrastructure/shared.go b/internal/knowledge/kpis/plugins/infrastructure/shared.go index 62eb44e9c..042fb648f 100644 --- a/internal/knowledge/kpis/plugins/infrastructure/shared.go +++ b/internal/knowledge/kpis/plugins/infrastructure/shared.go @@ -7,8 +7,10 @@ import ( "fmt" "regexp" "strconv" + "strings" "github.com/cobaltcore-dev/cortex/internal/knowledge/extractor/plugins/compute" + hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" ) const ( @@ -18,6 +20,7 @@ const ( hypervisorFamilyVMware = "vmware" vmwareComputeHostPattern = "nova-compute-%" vmwareIronicComputeHostPattern = "nova-compute-ironic-%" + kvmComputeHostPattern = "node%-bb%" ) // vmwareHost wraps HostDetails with Prometheus metric helpers. @@ -63,6 +66,71 @@ var vmwareHostLabels = []string{ "pinned_project_ids", } +var kvmHostLabels = []string{ + "compute_host", + "availability_zone", + "building_block", + "cpu_architecture", + "workload_type", + "enabled", + "decommissioned", + "external_customer", + "maintenance", +} + +type kvmHost struct { + hv1.Hypervisor +} + +func (h kvmHost) getHostLabels() []string { + decommissioned := false + externalCustomer := false + workloadType := "general-purpose" + cpuArchitecture := "cascade-lake" + + availabilityZone := h.Labels["topology.kubernetes.io/zone"] + if availabilityZone == "" { + availabilityZone = "unknown" + } + + buildingBlock := "unknown" + // Assuming hypervisor names are in the format nodeXXX-bbYY + parts := strings.Split(h.Name, "-") + if len(parts) > 1 { + buildingBlock = parts[1] + } + + for _, trait := range h.Status.Traits { + switch trait { + case "CUSTOM_HW_SAPPHIRE_RAPIDS": + cpuArchitecture = "sapphire-rapids" + case "CUSTOM_HANA_EXCLUSIVE_HOST": + workloadType = "hana" + case "CUSTOM_DECOMMISSIONING": + decommissioned = true + case "CUSTOM_EXTERNAL_CUSTOMER_EXCLUSIVE": + externalCustomer = true + } + } + + maintenance := false + if h.Spec.Maintenance != hv1.MaintenanceUnset { + maintenance = true + } + + return []string{ + h.Name, + availabilityZone, + buildingBlock, + cpuArchitecture, + workloadType, + strconv.FormatBool(true), + strconv.FormatBool(decommissioned), + strconv.FormatBool(externalCustomer), + strconv.FormatBool(maintenance), + } +} + var fqNameRe = regexp.MustCompile(`fqName: "([^"]+)"`) func getMetricName(desc string) string { diff --git a/internal/knowledge/kpis/plugins/infrastructure/shared_test.go b/internal/knowledge/kpis/plugins/infrastructure/shared_test.go index 351fedc50..c9315df08 100644 --- a/internal/knowledge/kpis/plugins/infrastructure/shared_test.go +++ b/internal/knowledge/kpis/plugins/infrastructure/shared_test.go @@ -7,6 +7,8 @@ import ( "testing" "github.com/cobaltcore-dev/cortex/internal/knowledge/extractor/plugins/compute" + hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) func mockVMwareHostLabels(computeHost, az string) map[string]string { @@ -24,7 +26,7 @@ func mockVMwareHostLabels(computeHost, az string) map[string]string { } } -func TestVMwareHostGetHostLabels(t *testing.T) { +func TestVMwareHost_GetHostLabels(t *testing.T) { str := func(s string) *string { return &s } tests := []struct { @@ -92,6 +94,110 @@ func TestVMwareHostGetHostLabels(t *testing.T) { } } +func TestKVMHost_GetHostLabels(t *testing.T) { + tests := []struct { + name string + host kvmHost + want []string + }{ + { + name: "defaults with no traits and no labels", + host: kvmHost{hv1.Hypervisor{ + ObjectMeta: metav1.ObjectMeta{Name: "node001-bb01"}, + }}, + want: []string{"node001-bb01", "unknown", "bb01", "cascade-lake", "general-purpose", "true", "false", "false", "false"}, + }, + { + name: "availability zone from label", + host: kvmHost{hv1.Hypervisor{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node001-bb01", + Labels: map[string]string{"topology.kubernetes.io/zone": "az1"}, + }, + }}, + want: []string{"node001-bb01", "az1", "bb01", "cascade-lake", "general-purpose", "true", "false", "false", "false"}, + }, + { + name: "name without dash results in unknown building block", + host: kvmHost{hv1.Hypervisor{ + ObjectMeta: metav1.ObjectMeta{Name: "nodewithoutdash"}, + }}, + want: []string{"nodewithoutdash", "unknown", "unknown", "cascade-lake", "general-purpose", "true", "false", "false", "false"}, + }, + { + name: "sapphire rapids trait", + host: kvmHost{hv1.Hypervisor{ + ObjectMeta: metav1.ObjectMeta{Name: "node001-bb01"}, + Status: hv1.HypervisorStatus{Traits: []string{"CUSTOM_HW_SAPPHIRE_RAPIDS"}}, + }}, + want: []string{"node001-bb01", "unknown", "bb01", "sapphire-rapids", "general-purpose", "true", "false", "false", "false"}, + }, + { + name: "hana exclusive host trait", + host: kvmHost{hv1.Hypervisor{ + ObjectMeta: metav1.ObjectMeta{Name: "node001-bb01"}, + Status: hv1.HypervisorStatus{Traits: []string{"CUSTOM_HANA_EXCLUSIVE_HOST"}}, + }}, + want: []string{"node001-bb01", "unknown", "bb01", "cascade-lake", "hana", "true", "false", "false", "false"}, + }, + { + name: "decommissioning trait", + host: kvmHost{hv1.Hypervisor{ + ObjectMeta: metav1.ObjectMeta{Name: "node001-bb01"}, + Status: hv1.HypervisorStatus{Traits: []string{"CUSTOM_DECOMMISSIONING"}}, + }}, + want: []string{"node001-bb01", "unknown", "bb01", "cascade-lake", "general-purpose", "true", "true", "false", "false"}, + }, + { + name: "external customer exclusive trait", + host: kvmHost{hv1.Hypervisor{ + ObjectMeta: metav1.ObjectMeta{Name: "node001-bb01"}, + Status: hv1.HypervisorStatus{Traits: []string{"CUSTOM_EXTERNAL_CUSTOMER_EXCLUSIVE"}}, + }}, + want: []string{"node001-bb01", "unknown", "bb01", "cascade-lake", "general-purpose", "true", "false", "true", "false"}, + }, + { + name: "maintenance set", + host: kvmHost{hv1.Hypervisor{ + ObjectMeta: metav1.ObjectMeta{Name: "node001-bb01"}, + Spec: hv1.HypervisorSpec{Maintenance: hv1.MaintenanceManual}, + }}, + want: []string{"node001-bb01", "unknown", "bb01", "cascade-lake", "general-purpose", "true", "false", "false", "true"}, + }, + { + name: "all traits and maintenance set", + host: kvmHost{hv1.Hypervisor{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node001-bb42", + Labels: map[string]string{"topology.kubernetes.io/zone": "az3"}, + }, + Spec: hv1.HypervisorSpec{Maintenance: hv1.MaintenanceAuto}, + Status: hv1.HypervisorStatus{Traits: []string{ + "CUSTOM_HW_SAPPHIRE_RAPIDS", + "CUSTOM_HANA_EXCLUSIVE_HOST", + "CUSTOM_DECOMMISSIONING", + "CUSTOM_EXTERNAL_CUSTOMER_EXCLUSIVE", + }}, + }}, + want: []string{"node001-bb42", "az3", "bb42", "sapphire-rapids", "hana", "true", "true", "true", "true"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := tt.host.getHostLabels() + if len(got) != len(kvmHostLabels) { + t.Fatalf("getHostLabels() returned %d values, want %d (matching kvmHostLabels)", len(got), len(kvmHostLabels)) + } + for i, want := range tt.want { + if got[i] != want { + t.Errorf("label[%d] (%s) = %q, want %q", i, kvmHostLabels[i], got[i], want) + } + } + }) + } +} + func TestIsKVMFlavor(t *testing.T) { tests := []struct { flavor string diff --git a/internal/knowledge/kpis/plugins/infrastructure/vmware_project_utilization.go b/internal/knowledge/kpis/plugins/infrastructure/vmware_project_utilization.go index bf9fb995b..368ebd194 100644 --- a/internal/knowledge/kpis/plugins/infrastructure/vmware_project_utilization.go +++ b/internal/knowledge/kpis/plugins/infrastructure/vmware_project_utilization.go @@ -119,20 +119,9 @@ func (k *VMwareProjectUtilizationKPI) Collect(ch chan<- prometheus.Metric) { hostLabels := host.getHostLabels() hostLabels = append(hostLabels, projectCapacityUsage.ProjectID, projectCapacityUsage.ProjectName, projectCapacityUsage.DomainID, projectCapacityUsage.DomainName) - memoryUsageBytes, err := bytesFromUnit(projectCapacityUsage.TotalRAMMB, "MB") - if err != nil { - slog.Error("vmware_project_utilization: failed to convert memory to bytes", "err", err) - continue - } - diskUsageBytes, err := bytesFromUnit(projectCapacityUsage.TotalDiskGB, "GB") - if err != nil { - slog.Error("vmware_project_utilization: failed to convert disk to bytes", "err", err) - continue - } - ch <- prometheus.MustNewConstMetric(k.capacityUsagePerProjectAndHost, prometheus.GaugeValue, projectCapacityUsage.TotalVCPUs, append(hostLabels, "vcpu")...) - ch <- prometheus.MustNewConstMetric(k.capacityUsagePerProjectAndHost, prometheus.GaugeValue, memoryUsageBytes, append(hostLabels, "memory")...) - ch <- prometheus.MustNewConstMetric(k.capacityUsagePerProjectAndHost, prometheus.GaugeValue, diskUsageBytes, append(hostLabels, "disk")...) + ch <- prometheus.MustNewConstMetric(k.capacityUsagePerProjectAndHost, prometheus.GaugeValue, projectCapacityUsage.TotalRAMMB*1024*1024, append(hostLabels, "memory")...) + ch <- prometheus.MustNewConstMetric(k.capacityUsagePerProjectAndHost, prometheus.GaugeValue, projectCapacityUsage.TotalDiskGB*1024*1024*1024, append(hostLabels, "disk")...) } } From 91fb6c8ad6e312234660315cc1b380b1010eb560 Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Wed, 6 May 2026 10:26:27 +0200 Subject: [PATCH 2/3] lint --- .../kpis/plugins/infrastructure/kvm_project_utilization.go | 1 - internal/knowledge/kpis/plugins/infrastructure/shared.go | 5 +---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/internal/knowledge/kpis/plugins/infrastructure/kvm_project_utilization.go b/internal/knowledge/kpis/plugins/infrastructure/kvm_project_utilization.go index c4a3c623e..20f0ca32e 100644 --- a/internal/knowledge/kpis/plugins/infrastructure/kvm_project_utilization.go +++ b/internal/knowledge/kpis/plugins/infrastructure/kvm_project_utilization.go @@ -121,7 +121,6 @@ func (k *KVMProjectUtilizationKPI) Collect(ch chan<- prometheus.Metric) { ch <- prometheus.MustNewConstMetric(k.capacityUsagePerProjectAndHost, prometheus.GaugeValue, projectCapacityUsage.TotalRAMMB*1024*1024, append(hostLabels, "memory")...) ch <- prometheus.MustNewConstMetric(k.capacityUsagePerProjectAndHost, prometheus.GaugeValue, projectCapacityUsage.TotalDiskGB*1024*1024*1024, append(hostLabels, "disk")...) } - } // getKVMHosts retrieves the list of KVM hosts and their details from the database, returning a map keyed by compute host name. diff --git a/internal/knowledge/kpis/plugins/infrastructure/shared.go b/internal/knowledge/kpis/plugins/infrastructure/shared.go index 042fb648f..c50bd0271 100644 --- a/internal/knowledge/kpis/plugins/infrastructure/shared.go +++ b/internal/knowledge/kpis/plugins/infrastructure/shared.go @@ -113,10 +113,7 @@ func (h kvmHost) getHostLabels() []string { } } - maintenance := false - if h.Spec.Maintenance != hv1.MaintenanceUnset { - maintenance = true - } + maintenance := h.Spec.Maintenance != hv1.MaintenanceUnset return []string{ h.Name, From c0ea22d7774950f835870103d57e9899c51ca137 Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Wed, 6 May 2026 10:28:49 +0200 Subject: [PATCH 3/3] feat: add kvm project utilization KPI support --- helm/bundles/cortex-nova/templates/kpis_kvm.yaml | 13 +++++++++++++ internal/knowledge/kpis/supported_kpis.go | 1 + 2 files changed, 14 insertions(+) diff --git a/helm/bundles/cortex-nova/templates/kpis_kvm.yaml b/helm/bundles/cortex-nova/templates/kpis_kvm.yaml index e98c0a447..48b9eb155 100644 --- a/helm/bundles/cortex-nova/templates/kpis_kvm.yaml +++ b/helm/bundles/cortex-nova/templates/kpis_kvm.yaml @@ -13,4 +13,17 @@ spec: - name: host-utilization description: | This KPI tracks the total, utilized, reserved and failover capacity of KVM hosts. +--- +apiVersion: cortex.cloud/v1alpha1 +kind: KPI +metadata: + name: kvm-project-utilization +spec: + schedulingDomain: nova + impl: kvm_project_utilization_kpi + dependencies: + datasources: + - name: nova-servers + - name: nova-flavors + - name: identity-projects {{- end }} \ No newline at end of file diff --git a/internal/knowledge/kpis/supported_kpis.go b/internal/knowledge/kpis/supported_kpis.go index 63a35866b..023454fb9 100644 --- a/internal/knowledge/kpis/supported_kpis.go +++ b/internal/knowledge/kpis/supported_kpis.go @@ -23,6 +23,7 @@ var supportedKPIs = map[string]plugins.KPI{ "vm_commitments_kpi": &compute.VMCommitmentsKPI{}, "vm_faults_kpi": &compute.VMFaultsKPI{}, + "kvm_project_utilization_kpi": &infrastructure.KVMProjectUtilizationKPI{}, "vmware_project_utilization_kpi": &infrastructure.VMwareProjectUtilizationKPI{}, "vmware_resource_commitments_kpi": &infrastructure.VMwareResourceCommitmentsKPI{}, "vmware_host_capacity_kpi": &infrastructure.VMwareHostCapacityKPI{},