From 9759b03e9f1da981140cb2b45dcb08e388769734 Mon Sep 17 00:00:00 2001 From: Malte Viering Date: Wed, 22 Apr 2026 15:38:27 +0200 Subject: [PATCH 1/8] fix: add FindFlavorInGroups --- .../reservations/commitments/controller.go | 24 +++---------------- .../scheduling/reservations/flavor_groups.go | 14 +++++++++++ 2 files changed, 17 insertions(+), 21 deletions(-) diff --git a/internal/scheduling/reservations/commitments/controller.go b/internal/scheduling/reservations/commitments/controller.go index 3808dcf42..d5ecfb5e7 100644 --- a/internal/scheduling/reservations/commitments/controller.go +++ b/internal/scheduling/reservations/commitments/controller.go @@ -5,7 +5,6 @@ package commitments import ( "context" - "errors" "fmt" "time" @@ -25,7 +24,6 @@ import ( schedulerdelegationapi "github.com/cobaltcore-dev/cortex/api/external/nova" "github.com/cobaltcore-dev/cortex/api/v1alpha1" - "github.com/cobaltcore-dev/cortex/internal/knowledge/extractor/plugins/compute" "github.com/cobaltcore-dev/cortex/internal/scheduling/reservations" "github.com/cobaltcore-dev/cortex/pkg/multicluster" hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" @@ -195,25 +193,9 @@ func (r *CommitmentReservationController) Reconcile(ctx context.Context, req ctr } // Search for the flavor across all flavor groups - // Also capture the flavor group name for pipeline selection - var flavorDetails *compute.FlavorInGroup - var flavorGroupName string - for groupName, fg := range flavorGroups { - for _, flavor := range fg.Flavors { - if flavor.Name == resourceName { - flavorDetails = &flavor - flavorGroupName = groupName - break - } - } - if flavorDetails != nil { - break - } - } - - // Check if flavor was found - if flavorDetails == nil { - logger.Error(errors.New("flavor not found"), "flavor not found in any flavor group", + flavorGroupName, flavorDetails, err := reservations.FindFlavorInGroups(resourceName, flavorGroups) + if err != nil { + logger.Error(err, "flavor not found in any flavor group", "resourceName", resourceName) return ctrl.Result{RequeueAfter: 5 * time.Minute}, nil } diff --git a/internal/scheduling/reservations/flavor_groups.go b/internal/scheduling/reservations/flavor_groups.go index 197406eac..b6344630a 100644 --- a/internal/scheduling/reservations/flavor_groups.go +++ b/internal/scheduling/reservations/flavor_groups.go @@ -15,6 +15,20 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" ) +// FindFlavorInGroups searches all flavor groups for a flavor by name. +// Returns the flavor group name and flavor details, or an error if the flavor +// is not found in any group. +func FindFlavorInGroups(flavorName string, flavorGroups map[string]compute.FlavorGroupFeature) (groupName string, flavor *compute.FlavorInGroup, err error) { + for gName, fg := range flavorGroups { + for i, f := range fg.Flavors { + if f.Name == flavorName { + return gName, &fg.Flavors[i], nil + } + } + } + return "", nil, fmt.Errorf("flavor %q not found in any flavor group", flavorName) +} + // FlavorGroupKnowledgeClient accesses flavor group data from Knowledge CRDs. type FlavorGroupKnowledgeClient struct { client.Client From e72c982df8d971a89c2fae1d7ecf986b89c02483 Mon Sep 17 00:00:00 2001 From: Malte Viering Date: Wed, 22 Apr 2026 15:43:42 +0200 Subject: [PATCH 2/8] fix: Add option to create failover reservation in the size of the larges flavor in group --- .../reservations/failover/config.go | 9 + .../reservations/failover/controller.go | 27 ++- .../reservations/failover/helpers.go | 109 +++++++++-- .../failover/reservation_scheduling.go | 35 ++-- .../failover/reservation_scheduling_test.go | 174 +++++++++++++++++- 5 files changed, 312 insertions(+), 42 deletions(-) diff --git a/internal/scheduling/reservations/failover/config.go b/internal/scheduling/reservations/failover/config.go index 79dc94480..b8bde49a6 100644 --- a/internal/scheduling/reservations/failover/config.go +++ b/internal/scheduling/reservations/failover/config.go @@ -77,6 +77,15 @@ type FailoverConfig struct { // rotates to process different VMs. This ensures all VMs eventually get processed. // Default: 4 (rotate every 4th reconcile cycle). Use 0 to disable rotation. VMSelectionRotationInterval *int `json:"vmSelectionRotationInterval"` + + // UseFlavorGroupResources when true, sizes failover reservation resources based on + // the LargestFlavor in the VM's flavor group instead of the VM's actual resources. + // This enables better sharing: a single reservation can accommodate any flavor in the + // group since it's sized for the largest one. + // When false (or when the flavor group lookup fails), falls back to using the VM's + // own reported resources (memory + vcpus). + // Default: false + UseFlavorGroupResources bool `json:"useFlavorGroupResources"` } // intPtr returns a pointer to the given int value. diff --git a/internal/scheduling/reservations/failover/controller.go b/internal/scheduling/reservations/failover/controller.go index 297ed09cc..ebd709b27 100644 --- a/internal/scheduling/reservations/failover/controller.go +++ b/internal/scheduling/reservations/failover/controller.go @@ -13,6 +13,7 @@ import ( "time" "github.com/cobaltcore-dev/cortex/api/v1alpha1" + "github.com/cobaltcore-dev/cortex/internal/knowledge/extractor/plugins/compute" "github.com/cobaltcore-dev/cortex/internal/scheduling/reservations" "github.com/cobaltcore-dev/cortex/pkg/multicluster" hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" @@ -319,8 +320,22 @@ func (c *FailoverReservationController) ReconcilePeriodic(ctx context.Context) ( } summary.reservationsDeleted = len(emptyReservationsToDelete) - // 6. Create and assign reservations for VMs that need them - assignSummary, hitMaxVMsLimit := c.reconcileCreateAndAssignReservations(ctx, vms, failoverReservations, allHypervisors) + // 6. Fetch flavor groups for reservation sizing (if configured) + var flavorGroups map[string]compute.FlavorGroupFeature + if c.Config.UseFlavorGroupResources { + knowledge := &reservations.FlavorGroupKnowledgeClient{Client: c.Client} + fg, err := knowledge.GetAllFlavorGroups(ctx, nil) + if err != nil { + logger.Info("flavor group knowledge not available, will fall back to VM resources for sizing", + "error", err) + // flavorGroups remains nil — newFailoverReservation will fall back to VM resources + } else { + flavorGroups = fg + } + } + + // 7. Create and assign reservations for VMs that need them + assignSummary, hitMaxVMsLimit := c.reconcileCreateAndAssignReservations(ctx, vms, failoverReservations, allHypervisors, flavorGroups) summary.vmsMissingFailover = assignSummary.vmsMissingFailover summary.vmsProcessed = assignSummary.vmsProcessed summary.reservationsNeeded = assignSummary.reservationsNeeded @@ -572,6 +587,7 @@ func (c *FailoverReservationController) reconcileCreateAndAssignReservations( vms []VM, failoverReservations []v1alpha1.Reservation, allHypervisors []string, + flavorGroups map[string]compute.FlavorGroupFeature, // passed to resolveVMForScheduling per-VM ) (reconcileSummary, bool) { logger := LoggerFromContext(ctx) @@ -605,7 +621,10 @@ func (c *FailoverReservationController) reconcileCreateAndAssignReservations( vmLogger.Info("processing VM for failover reservation") for i := range need.Count { - reusedRes := c.tryReuseExistingReservation(vmCtx, need.VM, failoverReservations, allHypervisors) + // Resolve VM resources once per VM (may use LargestFlavor from flavor group) + resSpec := resolveVMSpecForScheduling(vmCtx, need.VM, c.Config.UseFlavorGroupResources, flavorGroups) + + reusedRes := c.tryReuseExistingReservation(vmCtx, need.VM, failoverReservations, allHypervisors, resSpec) if reusedRes != nil { if err := c.patchReservationStatus(vmCtx, reusedRes); err != nil { @@ -628,7 +647,7 @@ func (c *FailoverReservationController) reconcileCreateAndAssignReservations( continue } - newRes, err := c.scheduleAndBuildNewFailoverReservation(vmCtx, need.VM, allHypervisors, failoverReservations, excludeHypervisors) + newRes, err := c.scheduleAndBuildNewFailoverReservation(vmCtx, need.VM, allHypervisors, failoverReservations, excludeHypervisors, resSpec) if err != nil { vmLogger.V(1).Info("failed to schedule failover reservation", "error", err, "iteration", i+1, "needed", need.Count) vmFailed++ diff --git a/internal/scheduling/reservations/failover/helpers.go b/internal/scheduling/reservations/failover/helpers.go index 0ec75456c..3c1598316 100644 --- a/internal/scheduling/reservations/failover/helpers.go +++ b/internal/scheduling/reservations/failover/helpers.go @@ -8,11 +8,91 @@ import ( "fmt" "github.com/cobaltcore-dev/cortex/api/v1alpha1" + "github.com/cobaltcore-dev/cortex/internal/knowledge/extractor/plugins/compute" + "github.com/cobaltcore-dev/cortex/internal/scheduling/reservations" hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) +// resolvedReservationSpec holds the resolved resource spec for scheduling and reservation sizing. +// When UseFlavorGroupResources is enabled and the VM's flavor is found in a group, +// resources are sized to the LargestFlavor. Otherwise, they come from the VM directly. +type resolvedReservationSpec struct { + FlavorName string // may be overridden to LargestFlavor.Name + FlavorGroupName string // flavor group name if found, empty otherwise + MemoryMB uint64 + VCPUs uint64 +} + +// ResourceGroup returns the flavor group name if available, otherwise falls back to the provided fallback value. +func (r resolvedReservationSpec) ResourceGroup(fallback string) string { + if r.FlavorGroupName != "" { + return r.FlavorGroupName + } + return fallback +} + +// HypervisorResources returns the reservation spec resources as a map suitable for the reservation CRD. +// We use "cpu" (not "vcpus") as the canonical key because the scheduling capacity logic +// (e.g., nova filter_has_enough_capacity) uses "cpu". +func (r resolvedReservationSpec) HypervisorResources() map[hv1.ResourceName]resource.Quantity { + return map[hv1.ResourceName]resource.Quantity{ + "memory": *resource.NewQuantity(int64(r.MemoryMB)*1024*1024, resource.BinarySI), //nolint:gosec // flavor memory from specs, realistically bounded + "cpu": *resource.NewQuantity(int64(r.VCPUs), resource.DecimalSI), //nolint:gosec // flavor vcpus from specs, realistically bounded + } +} + +// resolveVMSpecForScheduling resolves the VM's resources for scheduling. +// When useFlavorGroupResources is true and the flavor is found in a group, +// returns the LargestFlavor's name and size. Otherwise falls back to VM resources. +func resolveVMSpecForScheduling( + ctx context.Context, + vm VM, + useFlavorGroupResources bool, + flavorGroups map[string]compute.FlavorGroupFeature, +) resolvedReservationSpec { + + logger := LoggerFromContext(ctx) + + if useFlavorGroupResources && flavorGroups != nil { + groupName, _, err := reservations.FindFlavorInGroups(vm.FlavorName, flavorGroups) + if err == nil { + fg := flavorGroups[groupName] + largest := fg.LargestFlavor + logger.V(1).Info("resolved VM resources from flavor group LargestFlavor", + "vmFlavor", vm.FlavorName, + "flavorGroup", groupName, + "largestFlavor", largest.Name, + "memoryMB", largest.MemoryMB, + "vcpus", largest.VCPUs) + return resolvedReservationSpec{ + FlavorName: largest.Name, + FlavorGroupName: groupName, + MemoryMB: largest.MemoryMB, + VCPUs: largest.VCPUs, + } + } + logger.Info("flavor group lookup failed, falling back to VM resources", + "vmFlavor", vm.FlavorName, + "error", err) + } + + // Fallback: use VM's own resources + var memoryMB, vcpus uint64 + if memory, ok := vm.Resources["memory"]; ok { + memoryMB = uint64(memory.Value() / (1024 * 1024)) //nolint:gosec // memory values won't overflow + } + if v, ok := vm.Resources["vcpus"]; ok { + vcpus = uint64(v.Value()) //nolint:gosec // vcpus values won't overflow + } + return resolvedReservationSpec{ + FlavorName: vm.FlavorName, + MemoryMB: memoryMB, + VCPUs: vcpus, + } +} + // getFailoverAllocations safely returns the allocations map from a failover reservation. // Returns an empty map if the reservation has no failover status or allocations. func getFailoverAllocations(res *v1alpha1.Reservation) map[string]string { @@ -90,23 +170,20 @@ func ValidateFailoverReservationResources(res *v1alpha1.Reservation) error { // newFailoverReservation creates a new failover reservation for a VM on a specific hypervisor. // This does NOT persist the reservation to the cluster - it only creates the in-memory object. // The caller is responsible for persisting the reservation. -func newFailoverReservation(ctx context.Context, vm VM, hypervisor, creator string) *v1alpha1.Reservation { - logger := LoggerFromContext(ctx) +// +// The resolved parameter contains the pre-computed resources (from resolveVMForScheduling), +// which may come from the VM's flavor group LargestFlavor or from the VM's own resources. +// This ensures the same sizing is used for both the scheduler query and the reservation CRD. +func newFailoverReservation( + ctx context.Context, + vm VM, + hypervisor, creator string, + resolved resolvedReservationSpec, +) *v1alpha1.Reservation { - // Build resources from VM's Resources map - // The VM struct uses "vcpus" and "memory" keys (see vm_source.go) - // We convert "vcpus" to "cpu" for the reservation because the scheduling capacity logic - // (e.g., nova filter_has_enough_capacity) uses "cpu" as the canonical key. + logger := LoggerFromContext(ctx) - // TODO we may want to use different resource (bigger) to enable better sharing - resources := make(map[hv1.ResourceName]resource.Quantity) - if memory, ok := vm.Resources["memory"]; ok { - resources["memory"] = memory - } - if vcpus, ok := vm.Resources["vcpus"]; ok { - // todo check if that is correct, i.e. that the cpu reported on e.g. hypervisors is vcpu and not pcpu - resources["cpu"] = vcpus - } + resources := resSpec.HypervisorResources() reservation := &v1alpha1.Reservation{ ObjectMeta: metav1.ObjectMeta{ @@ -123,7 +200,7 @@ func newFailoverReservation(ctx context.Context, vm VM, hypervisor, creator stri Resources: resources, TargetHost: hypervisor, // Set the desired hypervisor from scheduler response FailoverReservation: &v1alpha1.FailoverReservationSpec{ - ResourceGroup: vm.FlavorName, + ResourceGroup: resSpec.ResourceGroup(vm.FlavorName), }, }, } diff --git a/internal/scheduling/reservations/failover/reservation_scheduling.go b/internal/scheduling/reservations/failover/reservation_scheduling.go index 6859d2cc4..8171ae3c8 100644 --- a/internal/scheduling/reservations/failover/reservation_scheduling.go +++ b/internal/scheduling/reservations/failover/reservation_scheduling.go @@ -30,7 +30,7 @@ const ( PipelineAcknowledgeFailoverReservation = "kvm-acknowledge-failover-reservation" ) -func (c *FailoverReservationController) queryHypervisorsFromScheduler(ctx context.Context, vm VM, allHypervisors []string, pipeline string) ([]string, error) { +func (c *FailoverReservationController) queryHypervisorsFromScheduler(ctx context.Context, vm VM, allHypervisors []string, pipeline string, resolved resolvedReservationSpec) ([]string, error) { logger := LoggerFromContext(ctx) // Build list of eligible hypervisors (excluding VM's current hypervisor) @@ -52,18 +52,6 @@ func (c *FailoverReservationController) queryHypervisorsFromScheduler(ctx contex ignoreHypervisors := []string{vm.CurrentHypervisor} - // Get memory and vcpus from VM resources - // The VM struct uses "vcpus" and "memory" keys (see vm_source.go) - var memoryMB uint64 - var vcpus uint64 - if memory, ok := vm.Resources["memory"]; ok { - // Convert from bytes to MB - memoryMB = uint64(memory.Value() / (1024 * 1024)) //nolint:gosec // memory values won't overflow - } - if vcpusRes, ok := vm.Resources["vcpus"]; ok { - vcpus = uint64(vcpusRes.Value()) //nolint:gosec // vcpus values won't overflow - } - // Build flavor extra specs from VM's extra specs // Start with the VM's actual extra specs, then ensure required defaults are set flavorExtraSpecs := make(map[string]string) @@ -78,13 +66,15 @@ func (c *FailoverReservationController) queryHypervisorsFromScheduler(ctx contex // Schedule the reservation using the SchedulerClient. // Note: We pass all hypervisors (from all AZs) in EligibleHosts. The scheduler pipeline's // filter_correct_az filter will exclude hosts that are not in the VM's availability zone. + // Use resSpec.FlavorName and reservation spec resources so the scheduler checks capacity for the + // correct flavor size (which may be the LargestFlavor from the flavor group). scheduleReq := reservations.ScheduleReservationRequest{ InstanceUUID: vm.UUID, ProjectID: vm.ProjectID, - FlavorName: vm.FlavorName, + FlavorName: resSpec.FlavorName, FlavorExtraSpecs: flavorExtraSpecs, - MemoryMB: memoryMB, - VCPUs: vcpus, + MemoryMB: resSpec.MemoryMB, + VCPUs: resSpec.VCPUs, EligibleHosts: eligibleHypervisors, IgnoreHosts: ignoreHypervisors, Pipeline: pipeline, @@ -123,11 +113,12 @@ func (c *FailoverReservationController) tryReuseExistingReservation( vm VM, failoverReservations []v1alpha1.Reservation, allHypervisors []string, + resolved resolvedReservationSpec, ) *v1alpha1.Reservation { logger := LoggerFromContext(ctx) - validHypervisors, err := c.queryHypervisorsFromScheduler(ctx, vm, allHypervisors, PipelineReuseFailoverReservation) + validHypervisors, err := c.queryHypervisorsFromScheduler(ctx, vm, allHypervisors, PipelineReuseFailoverReservation, resSpec) if err != nil { logger.Error(err, "failed to get potential hypervisors for VM", "vmUUID", vm.UUID) return nil @@ -263,12 +254,14 @@ func (c *FailoverReservationController) scheduleAndBuildNewFailoverReservation( allHypervisors []string, failoverReservations []v1alpha1.Reservation, excludeHypervisors map[string]bool, + resolved resolvedReservationSpec, ) (*v1alpha1.Reservation, error) { logger := LoggerFromContext(ctx) - // Get potential hypervisors from scheduler - validHypervisors, err := c.queryHypervisorsFromScheduler(ctx, vm, allHypervisors, PipelineNewFailoverReservation) + // Get potential hypervisors from scheduler using the reservation spec resources + // (which may be sized to the LargestFlavor from the flavor group) + validHypervisors, err := c.queryHypervisorsFromScheduler(ctx, vm, allHypervisors, PipelineNewFailoverReservation, resSpec) if err != nil { return nil, fmt.Errorf("failed to get potential hypervisors for VM: %w", err) } @@ -307,8 +300,8 @@ func (c *FailoverReservationController) scheduleAndBuildNewFailoverReservation( "selectedHypervisor", selectedHypervisor, "allReturnedHypervisors", validHypervisors) - // Build the failover reservation on the selected hypervisor (in-memory only) - reservation := newFailoverReservation(ctx, vm, selectedHypervisor, c.Config.Creator) + // Build the failover reservation using the same reservation spec resources + reservation := newFailoverReservation(ctx, vm, selectedHypervisor, c.Config.Creator, resSpec) return reservation, nil } diff --git a/internal/scheduling/reservations/failover/reservation_scheduling_test.go b/internal/scheduling/reservations/failover/reservation_scheduling_test.go index 0ae69f8db..7ea8bcb51 100644 --- a/internal/scheduling/reservations/failover/reservation_scheduling_test.go +++ b/internal/scheduling/reservations/failover/reservation_scheduling_test.go @@ -8,6 +8,7 @@ import ( "testing" "github.com/cobaltcore-dev/cortex/api/v1alpha1" + "github.com/cobaltcore-dev/cortex/internal/knowledge/extractor/plugins/compute" hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -138,7 +139,9 @@ func TestBuildNewFailoverReservation(t *testing.T) { ctx := context.Background() creator := "test-creator" - result := newFailoverReservation(ctx, tt.vm, tt.hypervisor, creator) + // Resolve using VM's own resources (no flavor groups) + resolved := resolveVMSpecForScheduling(ctx, tt.vm, false, nil) + result := newFailoverReservation(ctx, tt.vm, tt.hypervisor, creator, resolved) // Verify Status.Host if result.Status.Host != tt.wantHost { @@ -221,6 +224,175 @@ func TestBuildNewFailoverReservation(t *testing.T) { } } +// ============================================================================ +// Test: resolveVMSpecForScheduling + newFailoverReservation with flavor group resources +// ============================================================================ + +func TestResolveVMForSchedulingAndNewFailoverReservation(t *testing.T) { + // Build a flavor group where the VM's flavor is "hana_c60_m960" (small) + // but the LargestFlavor is "hana_c120_m1920" (large). + // When UseFlavorGroupResources is true, the resolved resources should use + // the LargestFlavor's name and size. The reservation should then be sized accordingly. + flavorGroups := map[string]compute.FlavorGroupFeature{ + "hana_v2": { + Name: "hana_v2", + Flavors: []compute.FlavorInGroup{ + {Name: "hana_c120_m1920", VCPUs: 120, MemoryMB: 1966080}, + {Name: "hana_c60_m960", VCPUs: 60, MemoryMB: 983040}, + {Name: "hana_c30_m480", VCPUs: 30, MemoryMB: 491520}, + }, + LargestFlavor: compute.FlavorInGroup{Name: "hana_c120_m1920", VCPUs: 120, MemoryMB: 1966080}, + SmallestFlavor: compute.FlavorInGroup{Name: "hana_c30_m480", VCPUs: 30, MemoryMB: 491520}, + }, + } + + tests := []struct { + name string + vm VM + useFlavorGroupResources bool + flavorGroups map[string]compute.FlavorGroupFeature + wantFlavorName string + wantFlavorGroupName string + wantResourceGroup string + wantMemoryMB uint64 + wantVCPUs uint64 + }{ + { + name: "uses LargestFlavor resources when enabled and flavor found", + vm: VM{ + UUID: "vm-1", + CurrentHypervisor: "host1", + FlavorName: "hana_c60_m960", + ProjectID: "test-project", + Resources: map[string]resource.Quantity{ + "vcpus": *resource.NewQuantity(60, resource.DecimalSI), + "memory": *resource.NewQuantity(983040*1024*1024, resource.BinarySI), + }, + }, + useFlavorGroupResources: true, + flavorGroups: flavorGroups, + wantFlavorName: "hana_c120_m1920", // LargestFlavor name + wantFlavorGroupName: "hana_v2", // flavor group name + wantResourceGroup: "hana_v2", // ResourceGroup = flavor group name + wantMemoryMB: 1966080, // LargestFlavor memory + wantVCPUs: 120, // LargestFlavor vcpus + }, + { + name: "falls back to VM resources when disabled", + vm: VM{ + UUID: "vm-2", + CurrentHypervisor: "host1", + FlavorName: "hana_c60_m960", + ProjectID: "test-project", + Resources: map[string]resource.Quantity{ + "vcpus": *resource.NewQuantity(60, resource.DecimalSI), + "memory": *resource.NewQuantity(983040*1024*1024, resource.BinarySI), + }, + }, + useFlavorGroupResources: false, + flavorGroups: flavorGroups, + wantFlavorName: "hana_c60_m960", // VM's own flavor name + wantFlavorGroupName: "", // no flavor group (disabled) + wantResourceGroup: "hana_c60_m960", // ResourceGroup = fallback to flavor name + wantMemoryMB: 983040, // VM's own memory + wantVCPUs: 60, // VM's own vcpus + }, + { + name: "falls back to VM resources when flavor not in any group", + vm: VM{ + UUID: "vm-3", + CurrentHypervisor: "host1", + FlavorName: "unknown_flavor", + ProjectID: "test-project", + Resources: map[string]resource.Quantity{ + "vcpus": *resource.NewQuantity(8, resource.DecimalSI), + "memory": *resource.NewQuantity(16384*1024*1024, resource.BinarySI), + }, + }, + useFlavorGroupResources: true, + flavorGroups: flavorGroups, + wantFlavorName: "unknown_flavor", // VM's own flavor name (fallback) + wantFlavorGroupName: "", // no flavor group (not found) + wantResourceGroup: "unknown_flavor", // ResourceGroup = fallback to flavor name + wantMemoryMB: 16384, // VM's own memory (fallback) + wantVCPUs: 8, // VM's own vcpus (fallback) + }, + { + name: "falls back to VM resources when flavorGroups is nil", + vm: VM{ + UUID: "vm-4", + CurrentHypervisor: "host1", + FlavorName: "hana_c60_m960", + ProjectID: "test-project", + Resources: map[string]resource.Quantity{ + "vcpus": *resource.NewQuantity(60, resource.DecimalSI), + "memory": *resource.NewQuantity(983040*1024*1024, resource.BinarySI), + }, + }, + useFlavorGroupResources: true, + flavorGroups: nil, // nil flavor groups + wantFlavorName: "hana_c60_m960", // VM's own flavor name (fallback) + wantFlavorGroupName: "", // no flavor group (nil groups) + wantResourceGroup: "hana_c60_m960", // ResourceGroup = fallback to flavor name + wantMemoryMB: 983040, // VM's own memory (fallback) + wantVCPUs: 60, // VM's own vcpus (fallback) + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ctx := context.Background() + creator := "test-creator" + + // Test resolveVMSpecForScheduling + resolved := resolveVMSpecForScheduling(ctx, tt.vm, tt.useFlavorGroupResources, tt.flavorGroups) + + if resolved.FlavorName != tt.wantFlavorName { + t.Errorf("resolved.FlavorName = %q, want %q", resolved.FlavorName, tt.wantFlavorName) + } + if resolved.MemoryMB != tt.wantMemoryMB { + t.Errorf("resolved.MemoryMB = %d, want %d", resolved.MemoryMB, tt.wantMemoryMB) + } + if resolved.VCPUs != tt.wantVCPUs { + t.Errorf("resolved.VCPUs = %d, want %d", resolved.VCPUs, tt.wantVCPUs) + } + if resolved.FlavorGroupName != tt.wantFlavorGroupName { + t.Errorf("resolved.FlavorGroupName = %q, want %q", resolved.FlavorGroupName, tt.wantFlavorGroupName) + } + + // Test that newFailoverReservation uses the resolved values correctly + result := newFailoverReservation(ctx, tt.vm, "target-host", creator, resolved) + + // Verify reservation memory matches resolved + resMemory, ok := result.Spec.Resources[hv1.ResourceMemory] + if !ok { + t.Fatal("reservation missing memory resource") + } + wantMemoryBytes := int64(tt.wantMemoryMB) * 1024 * 1024 //nolint:gosec // test values won't overflow + if resMemory.Value() != wantMemoryBytes { + t.Errorf("reservation memory = %d bytes, want %d bytes", resMemory.Value(), wantMemoryBytes) + } + + // Verify reservation CPU matches resolved + resCPU, ok := result.Spec.Resources[hv1.ResourceCPU] + if !ok { + t.Fatal("reservation missing cpu resource") + } + if resCPU.Value() != int64(tt.wantVCPUs) { //nolint:gosec // test values won't overflow + t.Errorf("reservation cpu = %d, want %d", resCPU.Value(), tt.wantVCPUs) + } + + // Verify ResourceGroup on the reservation + if result.Spec.FailoverReservation == nil { + t.Fatal("reservation missing FailoverReservation spec") + } + if result.Spec.FailoverReservation.ResourceGroup != tt.wantResourceGroup { + t.Errorf("ResourceGroup = %q, want %q", result.Spec.FailoverReservation.ResourceGroup, tt.wantResourceGroup) + } + }) + } +} + // ============================================================================ // Test Helpers (local to this test file) // ============================================================================ From 93f1a47ffbec910fd38c6d1e8bf6adc57b242dc3 Mon Sep 17 00:00:00 2001 From: Malte Viering Date: Thu, 23 Apr 2026 10:34:25 +0200 Subject: [PATCH 3/8] Add IsReady for reservations --- api/v1alpha1/reservation_types.go | 6 ++++++ .../knowledge/kpis/plugins/compute/resource_capacity_kvm.go | 5 +---- .../nova/plugins/filters/filter_has_enough_capacity.go | 3 +-- .../nova/plugins/weighers/kvm_failover_evacuation.go | 4 +--- internal/scheduling/reservations/commitments/controller.go | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/api/v1alpha1/reservation_types.go b/api/v1alpha1/reservation_types.go index aee4a165d..131c0788b 100644 --- a/api/v1alpha1/reservation_types.go +++ b/api/v1alpha1/reservation_types.go @@ -5,6 +5,7 @@ package v1alpha1 import ( hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" + "k8s.io/apimachinery/pkg/api/meta" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -248,6 +249,11 @@ type ReservationList struct { Items []Reservation `json:"items"` } +// IsReady returns true if the reservation has the Ready condition set to True. +func (r *Reservation) IsReady() bool { + return meta.IsStatusConditionTrue(r.Status.Conditions, ReservationConditionReady) +} + func init() { SchemeBuilder.Register(&Reservation{}, &ReservationList{}) } diff --git a/internal/knowledge/kpis/plugins/compute/resource_capacity_kvm.go b/internal/knowledge/kpis/plugins/compute/resource_capacity_kvm.go index 7d3bb33ef..4a4040fd5 100644 --- a/internal/knowledge/kpis/plugins/compute/resource_capacity_kvm.go +++ b/internal/knowledge/kpis/plugins/compute/resource_capacity_kvm.go @@ -9,9 +9,7 @@ import ( "strconv" "strings" - "k8s.io/apimachinery/pkg/api/meta" "k8s.io/apimachinery/pkg/api/resource" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client" "github.com/cobaltcore-dev/cortex/api/v1alpha1" @@ -177,8 +175,7 @@ func aggregateReservationsByHost(reservations []v1alpha1.Reservation) ( continue } - readyCondition := meta.FindStatusCondition(reservation.Status.Conditions, v1alpha1.ReservationConditionReady) - if readyCondition == nil || readyCondition.Status != metav1.ConditionTrue { + if !reservation.IsReady() { continue } diff --git a/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity.go b/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity.go index e6956609a..5b471f789 100644 --- a/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity.go +++ b/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity.go @@ -13,7 +13,6 @@ import ( "github.com/cobaltcore-dev/cortex/api/v1alpha1" "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" - "k8s.io/apimachinery/pkg/api/meta" "k8s.io/apimachinery/pkg/api/resource" ) @@ -92,7 +91,7 @@ func (s *FilterHasEnoughCapacity) Run(traceLog *slog.Logger, request api.Externa return nil, err } for _, reservation := range reservations.Items { - if !meta.IsStatusConditionTrue(reservation.Status.Conditions, v1alpha1.ReservationConditionReady) { + if !reservation.IsReady() { continue // Only consider active reservations (Ready=True). } diff --git a/internal/scheduling/nova/plugins/weighers/kvm_failover_evacuation.go b/internal/scheduling/nova/plugins/weighers/kvm_failover_evacuation.go index 1f404f6b5..dcbcbf8bd 100644 --- a/internal/scheduling/nova/plugins/weighers/kvm_failover_evacuation.go +++ b/internal/scheduling/nova/plugins/weighers/kvm_failover_evacuation.go @@ -10,7 +10,6 @@ import ( api "github.com/cobaltcore-dev/cortex/api/external/nova" "github.com/cobaltcore-dev/cortex/api/v1alpha1" "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" - "k8s.io/apimachinery/pkg/api/meta" ) // Options for the KVM failover evacuation weigher. @@ -72,8 +71,7 @@ func (s *KVMFailoverEvacuationStep) Run(traceLog *slog.Logger, request api.Exter failoverHosts := make(map[string]bool) for _, reservation := range reservations.Items { // Only consider active failover reservations (Ready condition is True) - readyCondition := meta.FindStatusCondition(reservation.Status.Conditions, v1alpha1.ReservationConditionReady) - if readyCondition == nil || readyCondition.Status != "True" { + if !reservation.IsReady() { continue } if reservation.Spec.Type != v1alpha1.ReservationTypeFailover { diff --git a/internal/scheduling/reservations/commitments/controller.go b/internal/scheduling/reservations/commitments/controller.go index d5ecfb5e7..bc00742cd 100644 --- a/internal/scheduling/reservations/commitments/controller.go +++ b/internal/scheduling/reservations/commitments/controller.go @@ -89,7 +89,7 @@ func (r *CommitmentReservationController) Reconcile(ctx context.Context, req ctr return ctrl.Result{}, nil // Don't need to requeue. } - if meta.IsStatusConditionTrue(res.Status.Conditions, v1alpha1.ReservationConditionReady) { + if res.IsReady() { logger.V(1).Info("reservation is active, verifying allocations") // Verify all allocations in Spec against actual VM state From 29efb2f673bc904a834aba0b4b78f2d862ae2cad Mon Sep 17 00:00:00 2001 From: Malte Viering Date: Thu, 23 Apr 2026 11:11:23 +0200 Subject: [PATCH 4/8] feat: add kmv failover consolidation weigher --- api/external/nova/messages.go | 5 + .../kvm_failover_reservation_consolidation.go | 149 +++++++++ ...failover_reservation_consolidation_test.go | 296 ++++++++++++++++++ .../reservations/failover/helpers.go | 2 +- .../failover/reservation_scheduling.go | 11 +- .../failover/reservation_scheduling_test.go | 22 +- 6 files changed, 469 insertions(+), 16 deletions(-) create mode 100644 internal/scheduling/nova/plugins/weighers/kvm_failover_reservation_consolidation.go create mode 100644 internal/scheduling/nova/plugins/weighers/kvm_failover_reservation_consolidation_test.go diff --git a/api/external/nova/messages.go b/api/external/nova/messages.go index a401df269..e82568941 100644 --- a/api/external/nova/messages.go +++ b/api/external/nova/messages.go @@ -151,6 +151,11 @@ const ( ReserveForFailoverIntent v1alpha1.SchedulingIntent = "reserve_for_failover" // ReserveForCommittedResourceIntent indicates that the request is for CR reservation scheduling. ReserveForCommittedResourceIntent v1alpha1.SchedulingIntent = "reserve_for_committed_resource" + + // HintKeyResourceGroup is the scheduler hint key used to pass the resource group + // (e.g., flavor group name) for failover reservation scheduling. + // The weigher uses this to compare against existing reservations' ResourceGroup. + HintKeyResourceGroup = "_cortex_resource_group" ) // GetIntent analyzes the request spec and determines the intent of the scheduling request. diff --git a/internal/scheduling/nova/plugins/weighers/kvm_failover_reservation_consolidation.go b/internal/scheduling/nova/plugins/weighers/kvm_failover_reservation_consolidation.go new file mode 100644 index 000000000..6bedb7d1c --- /dev/null +++ b/internal/scheduling/nova/plugins/weighers/kvm_failover_reservation_consolidation.go @@ -0,0 +1,149 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package weighers + +import ( + "context" + "log/slog" + + api "github.com/cobaltcore-dev/cortex/api/external/nova" + "github.com/cobaltcore-dev/cortex/api/v1alpha1" + "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" +) + +// Options for the KVM failover reservation consolidation weigher. +type KVMFailoverReservationConsolidationOpts struct { + // Weight multiplier for the total failover reservation count per host (consolidation signal). + // Higher values more aggressively pack failover reservations onto fewer hosts. + // Default: 1.0 + TotalCountWeight *float64 `json:"totalCountWeight,omitempty"` + // Penalty multiplier for same-spec reservation count per host (diversity signal). + // Higher values more aggressively avoid clustering reservations of the same size on one host. + // Should be less than TotalCountWeight to ensure consolidation is the primary goal. + // Default: 0.1 + SameSpecPenalty *float64 `json:"sameSpecPenalty,omitempty"` +} + +func (o KVMFailoverReservationConsolidationOpts) Validate() error { + return nil +} + +func (o KVMFailoverReservationConsolidationOpts) GetTotalCountWeight() float64 { + if o.TotalCountWeight == nil { + return 1.0 + } + return *o.TotalCountWeight +} + +func (o KVMFailoverReservationConsolidationOpts) GetSameSpecPenalty() float64 { + if o.SameSpecPenalty == nil { + return 0.1 + } + return *o.SameSpecPenalty +} + +// KVMFailoverReservationConsolidationStep weighs hosts for failover reservation placement. +// It encourages consolidating failover reservations onto as few hosts as possible (primary goal), +// while preferring hosts with fewer reservations of the same ResourceGroup (secondary tiebreaker). +// +// The ResourceGroup is passed via the scheduler hint "_cortex_resource_group" and compared against +// each existing reservation's Spec.FailoverReservation.ResourceGroup. This groups reservations +// by flavor group (or individual flavor name when no group exists). +// +// Score formula (normalized by total reservation count T): +// +// score = (totalCountWeight / T) × hostCount - (sameSpecPenalty / T) × sameGroupCount +// +// This produces bounded output (~0 to 1) that plays nicely with other weighers. +type KVMFailoverReservationConsolidationStep struct { + lib.BaseWeigher[api.ExternalSchedulerRequest, KVMFailoverReservationConsolidationOpts] +} + +// Run the weigher step. +// For reserve_for_failover requests, hosts are scored based on existing failover reservation density +// and same-spec diversity. For all other request types, this weigher has no effect. +func (s *KVMFailoverReservationConsolidationStep) Run(traceLog *slog.Logger, request api.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineStepResult, error) { + result := s.IncludeAllHostsFromRequest(request) + + intent, err := request.GetIntent() + if err != nil || intent != api.ReserveForFailoverIntent { + traceLog.Info("skipping failover reservation consolidation weigher for non-failover-reservation request") + return result, nil //nolint:nilerr // intentionally skip weigher on error + } + + // Extract the resource group from the scheduler hint. + // This identifies which "spec group" the incoming reservation belongs to. + // If the hint is missing, requestResourceGroup will be empty and the same-group penalty is skipped. + requestResourceGroup, _ := request.Spec.Data.GetSchedulerHintStr(api.HintKeyResourceGroup) //nolint:errcheck // missing hint is fine, same-group penalty is simply skipped + + // Fetch all reservations. + var reservations v1alpha1.ReservationList + if err := s.Client.List(context.Background(), &reservations); err != nil { + return nil, err + } + + // Count failover reservations per host, and same-group reservations per host. + totalPerHost := make(map[string]float64) + sameGroupPerHost := make(map[string]float64) + totalReservations := 0 + + for _, reservation := range reservations.Items { + // Only consider active failover reservations (Ready condition is True). + if !reservation.IsReady() { + continue + } + if reservation.Spec.Type != v1alpha1.ReservationTypeFailover { + continue + } + + host := reservation.Status.Host + if host == "" { + continue + } + + totalReservations++ + totalPerHost[host]++ + + // Check if this reservation belongs to the same resource group as the request. + if requestResourceGroup != "" && reservation.Spec.FailoverReservation != nil && + reservation.Spec.FailoverReservation.ResourceGroup == requestResourceGroup { + sameGroupPerHost[host]++ + } + } + + // If there are no failover reservations, the weigher has no information to act on. + if totalReservations == 0 { + traceLog.Info("no active failover reservations found, skipping consolidation weigher") + return result, nil + } + + totalCountWeight := s.Options.GetTotalCountWeight() + sameSpecPenalty := s.Options.GetSameSpecPenalty() + t := float64(totalReservations) + + for _, host := range request.Hosts { + hostTotal := totalPerHost[host.ComputeHost] + hostSameGroup := sameGroupPerHost[host.ComputeHost] + + // Normalized score: bounded output for compatibility with other weighers. + score := (totalCountWeight/t)*hostTotal - (sameSpecPenalty/t)*hostSameGroup + + result.Activations[host.ComputeHost] = score + traceLog.Info("calculated failover consolidation score for host", + "host", host.ComputeHost, + "totalOnHost", hostTotal, + "sameGroupOnHost", hostSameGroup, + "resourceGroup", requestResourceGroup, + "totalReservations", totalReservations, + "score", score) + } + + return result, nil +} + +func init() { + Index["kvm_failover_reservation_consolidation"] = func() NovaWeigher { + return &KVMFailoverReservationConsolidationStep{} + } +} diff --git a/internal/scheduling/nova/plugins/weighers/kvm_failover_reservation_consolidation_test.go b/internal/scheduling/nova/plugins/weighers/kvm_failover_reservation_consolidation_test.go new file mode 100644 index 000000000..e477858f2 --- /dev/null +++ b/internal/scheduling/nova/plugins/weighers/kvm_failover_reservation_consolidation_test.go @@ -0,0 +1,296 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package weighers + +import ( + "log/slog" + "math" + "testing" + + api "github.com/cobaltcore-dev/cortex/api/external/nova" + "github.com/cobaltcore-dev/cortex/api/v1alpha1" + testlib "github.com/cobaltcore-dev/cortex/pkg/testing" + hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +// newFailoverReservationWithGroup creates a failover reservation with a specific resource group. +func newFailoverReservationWithGroup(name, targetHost, resourceGroup string) *v1alpha1.Reservation { + return &v1alpha1.Reservation{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + Spec: v1alpha1.ReservationSpec{ + Type: v1alpha1.ReservationTypeFailover, + TargetHost: targetHost, + Resources: map[hv1.ResourceName]resource.Quantity{ + hv1.ResourceCPU: *resource.NewQuantity(4, resource.DecimalSI), + hv1.ResourceMemory: *resource.NewQuantity(8192*1_000_000, resource.DecimalSI), + }, + FailoverReservation: &v1alpha1.FailoverReservationSpec{ + ResourceGroup: resourceGroup, + }, + }, + Status: v1alpha1.ReservationStatus{ + Conditions: []metav1.Condition{ + { + Type: v1alpha1.ReservationConditionReady, + Status: metav1.ConditionTrue, + Reason: "ReservationActive", + }, + }, + Host: targetHost, + FailoverReservation: &v1alpha1.FailoverReservationStatus{ + Allocations: map[string]string{"some-vm": "some-host"}, + }, + }, + } +} + +func newFailoverReservationRequest(resourceGroup string, hosts []string) api.ExternalSchedulerRequest { + hostList := make([]api.ExternalSchedulerHost, len(hosts)) + for i, h := range hosts { + hostList[i] = api.ExternalSchedulerHost{ComputeHost: h} + } + + spec := api.NovaSpec{ + ProjectID: "project-A", + InstanceUUID: "test-instance", + NumInstances: 1, + SchedulerHints: map[string]any{ + "_nova_check_type": string(api.ReserveForFailoverIntent), + api.HintKeyResourceGroup: resourceGroup, + }, + Flavor: api.NovaObject[api.NovaFlavor]{ + Data: api.NovaFlavor{ + Name: "m1.large", + VCPUs: 4, + MemoryMB: 8192, + ExtraSpecs: map[string]string{ + "capabilities:hypervisor_type": "qemu", + }, + }, + }, + } + + weights := make(map[string]float64) + for _, h := range hosts { + weights[h] = 1.0 + } + + return api.ExternalSchedulerRequest{ + Spec: api.NovaObject[api.NovaSpec]{Data: spec}, + Hosts: hostList, + Weights: weights, + } +} + +func approxEqual(a, b, epsilon float64) bool { + return math.Abs(a-b) < epsilon +} + +func TestKVMFailoverReservationConsolidationStep_Run(t *testing.T) { + scheme := buildTestScheme(t) + + tests := []struct { + name string + reservations []*v1alpha1.Reservation + request api.ExternalSchedulerRequest + opts KVMFailoverReservationConsolidationOpts + expectedWeights map[string]float64 + }{ + { + name: "consolidation: prefer host with existing failover reservations", + reservations: []*v1alpha1.Reservation{ + // host1 has 3 reservations (different groups) + newFailoverReservationWithGroup("res-1", "host1", "group-A"), + newFailoverReservationWithGroup("res-2", "host1", "group-B"), + newFailoverReservationWithGroup("res-3", "host1", "group-C"), + // host2 has 1 reservation + newFailoverReservationWithGroup("res-4", "host2", "group-B"), + }, + // Request for group-D - no same-group on any host + request: newFailoverReservationRequest("group-D", []string{"host1", "host2", "host3"}), + opts: KVMFailoverReservationConsolidationOpts{TotalCountWeight: testlib.Ptr(1.0), SameSpecPenalty: testlib.Ptr(0.1)}, + // T=4, host1: (1/4)*3=0.75, host2: (1/4)*1=0.25, host3: 0 + expectedWeights: map[string]float64{"host1": 0.75, "host2": 0.25, "host3": 0}, + }, + { + name: "same-group penalty: prefer host with fewer same-group reservations", + reservations: []*v1alpha1.Reservation{ + // host1 has 5 reservations, 0 same-group (group-A) + newFailoverReservationWithGroup("res-1", "host1", "group-B"), + newFailoverReservationWithGroup("res-2", "host1", "group-B"), + newFailoverReservationWithGroup("res-3", "host1", "group-C"), + newFailoverReservationWithGroup("res-4", "host1", "group-C"), + newFailoverReservationWithGroup("res-5", "host1", "group-D"), + // host2 has 5 reservations, 3 same-group (group-A) + newFailoverReservationWithGroup("res-6", "host2", "group-A"), + newFailoverReservationWithGroup("res-7", "host2", "group-A"), + newFailoverReservationWithGroup("res-8", "host2", "group-A"), + newFailoverReservationWithGroup("res-9", "host2", "group-C"), + newFailoverReservationWithGroup("res-10", "host2", "group-D"), + }, + request: newFailoverReservationRequest("group-A", []string{"host1", "host2", "host3"}), + opts: KVMFailoverReservationConsolidationOpts{TotalCountWeight: testlib.Ptr(1.0), SameSpecPenalty: testlib.Ptr(0.1)}, + // T=10 + // host1: (1/10)*5 - (0.1/10)*0 = 0.5 + // host2: (1/10)*5 - (0.1/10)*3 = 0.5 - 0.03 = 0.47 + // host3: 0 + expectedWeights: map[string]float64{"host1": 0.5, "host2": 0.47, "host3": 0}, + }, + { + name: "consolidation dominates: host with reservations preferred over empty host even with same-group", + reservations: []*v1alpha1.Reservation{ + // host2 has 5 reservations, 3 same-group (group-A) + newFailoverReservationWithGroup("res-1", "host2", "group-A"), + newFailoverReservationWithGroup("res-2", "host2", "group-A"), + newFailoverReservationWithGroup("res-3", "host2", "group-A"), + newFailoverReservationWithGroup("res-4", "host2", "group-C"), + newFailoverReservationWithGroup("res-5", "host2", "group-D"), + }, + request: newFailoverReservationRequest("group-A", []string{"host2", "host3"}), + opts: KVMFailoverReservationConsolidationOpts{TotalCountWeight: testlib.Ptr(1.0), SameSpecPenalty: testlib.Ptr(0.1)}, + // T=5 + // host2: (1/5)*5 - (0.1/5)*3 = 1.0 - 0.06 = 0.94 + // host3: 0 + expectedWeights: map[string]float64{"host2": 0.94, "host3": 0}, + }, + { + name: "no reservations: all hosts get default weight (no effect)", + reservations: []*v1alpha1.Reservation{}, + request: newFailoverReservationRequest("group-A", []string{"host1", "host2"}), + opts: KVMFailoverReservationConsolidationOpts{TotalCountWeight: testlib.Ptr(1.0), SameSpecPenalty: testlib.Ptr(0.1)}, + expectedWeights: map[string]float64{"host1": 0, "host2": 0}, + }, + { + name: "non-failover request: weigher has no effect", + reservations: []*v1alpha1.Reservation{ + newFailoverReservationWithGroup("res-1", "host1", "group-A"), + }, + // Use a non-failover request (evacuation) + request: newNovaRequest("instance-123", true, []string{"host1", "host2"}), + opts: KVMFailoverReservationConsolidationOpts{TotalCountWeight: testlib.Ptr(1.0), SameSpecPenalty: testlib.Ptr(0.1)}, + expectedWeights: map[string]float64{"host1": 0, "host2": 0}, + }, + { + name: "non-failover request without hints: weigher has no effect", + reservations: []*v1alpha1.Reservation{ + newFailoverReservationWithGroup("res-1", "host1", "group-A"), + }, + // Use a non-failover request (no hints = create intent) + request: newNovaRequest("instance-123", false, []string{"host1", "host2"}), + opts: KVMFailoverReservationConsolidationOpts{TotalCountWeight: testlib.Ptr(1.0), SameSpecPenalty: testlib.Ptr(0.1)}, + expectedWeights: map[string]float64{"host1": 0, "host2": 0}, + }, + { + name: "default options work correctly", + reservations: []*v1alpha1.Reservation{ + newFailoverReservationWithGroup("res-1", "host1", "group-B"), + newFailoverReservationWithGroup("res-2", "host1", "group-A"), // same group + newFailoverReservationWithGroup("res-3", "host2", "group-B"), + }, + request: newFailoverReservationRequest("group-A", []string{"host1", "host2", "host3"}), + opts: KVMFailoverReservationConsolidationOpts{}, // nil = use defaults + // Defaults: TotalCountWeight=1.0, SameSpecPenalty=0.1, T=3 + // host1: (1/3)*2 - (0.1/3)*1 ≈ 0.6667 - 0.0333 = 0.6333 + // host2: (1/3)*1 - (0.1/3)*0 ≈ 0.3333 + // host3: 0 + expectedWeights: map[string]float64{"host1": 2.0/3.0 - 0.1/3.0, "host2": 1.0 / 3.0, "host3": 0}, + }, + { + name: "committed resource reservations are ignored", + reservations: []*v1alpha1.Reservation{ + newFailoverReservationWithGroup("res-1", "host1", "group-A"), + newCommittedReservation("committed-1", "host2"), + }, + request: newFailoverReservationRequest("group-A", []string{"host1", "host2", "host3"}), + opts: KVMFailoverReservationConsolidationOpts{TotalCountWeight: testlib.Ptr(1.0), SameSpecPenalty: testlib.Ptr(0.1)}, + // T=1 (only 1 failover reservation), committed reservation ignored + // host1: (1/1)*1 - (0.1/1)*1 = 0.9 + // host2: 0 (committed reservation not counted) + // host3: 0 + expectedWeights: map[string]float64{"host1": 0.9, "host2": 0, "host3": 0}, + }, + { + name: "failed reservations are ignored", + reservations: []*v1alpha1.Reservation{ + newFailoverReservationWithGroup("res-1", "host1", "group-A"), + newFailoverReservation("failed-res", "host2", true, map[string]string{"vm-1": "h-1"}), + }, + request: newFailoverReservationRequest("group-A", []string{"host1", "host2"}), + opts: KVMFailoverReservationConsolidationOpts{TotalCountWeight: testlib.Ptr(1.0), SameSpecPenalty: testlib.Ptr(0.1)}, + // T=1 (failed reservation ignored) + // host1: (1/1)*1 - (0.1/1)*1 = 0.9 + // host2: 0 + expectedWeights: map[string]float64{"host1": 0.9, "host2": 0}, + }, + { + name: "custom weights adjust scoring", + reservations: []*v1alpha1.Reservation{ + newFailoverReservationWithGroup("res-1", "host1", "group-A"), + newFailoverReservationWithGroup("res-2", "host1", "group-A"), + newFailoverReservationWithGroup("res-3", "host2", "group-B"), + }, + request: newFailoverReservationRequest("group-A", []string{"host1", "host2"}), + opts: KVMFailoverReservationConsolidationOpts{TotalCountWeight: testlib.Ptr(2.0), SameSpecPenalty: testlib.Ptr(0.5)}, + // T=3, W=2.0, P=0.5 + // host1: (2/3)*2 - (0.5/3)*2 = 1.3333 - 0.3333 = 1.0 + // host2: (2/3)*1 - (0.5/3)*0 = 0.6667 + expectedWeights: map[string]float64{"host1": 1.0, "host2": 2.0 / 3.0}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + objects := make([]client.Object, 0, len(tt.reservations)) + for _, r := range tt.reservations { + objects = append(objects, r) + } + + step := &KVMFailoverReservationConsolidationStep{} + step.Client = fake.NewClientBuilder().WithScheme(scheme).WithObjects(objects...).Build() + step.Options = tt.opts + + result, err := step.Run(slog.Default(), tt.request) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + + for host, expectedWeight := range tt.expectedWeights { + actualWeight, ok := result.Activations[host] + if !ok { + t.Errorf("expected host %s to be in activations", host) + continue + } + if !approxEqual(actualWeight, expectedWeight, 0.0001) { + t.Errorf("host %s: expected weight %v, got %v", host, expectedWeight, actualWeight) + } + } + }) + } +} + +func TestKVMFailoverReservationConsolidationOpts_Defaults(t *testing.T) { + opts := KVMFailoverReservationConsolidationOpts{} + if opts.GetTotalCountWeight() != 1.0 { + t.Errorf("expected default TotalCountWeight 1.0, got %v", opts.GetTotalCountWeight()) + } + if opts.GetSameSpecPenalty() != 0.1 { + t.Errorf("expected default SameSpecPenalty 0.1, got %v", opts.GetSameSpecPenalty()) + } +} + +func TestKVMFailoverReservationConsolidationOpts_Validate(t *testing.T) { + opts := KVMFailoverReservationConsolidationOpts{ + TotalCountWeight: testlib.Ptr(2.0), + SameSpecPenalty: testlib.Ptr(0.5), + } + if err := opts.Validate(); err != nil { + t.Errorf("expected no error, got %v", err) + } +} diff --git a/internal/scheduling/reservations/failover/helpers.go b/internal/scheduling/reservations/failover/helpers.go index 3c1598316..48446e0c0 100644 --- a/internal/scheduling/reservations/failover/helpers.go +++ b/internal/scheduling/reservations/failover/helpers.go @@ -178,7 +178,7 @@ func newFailoverReservation( ctx context.Context, vm VM, hypervisor, creator string, - resolved resolvedReservationSpec, + resSpec resolvedReservationSpec, ) *v1alpha1.Reservation { logger := LoggerFromContext(ctx) diff --git a/internal/scheduling/reservations/failover/reservation_scheduling.go b/internal/scheduling/reservations/failover/reservation_scheduling.go index 8171ae3c8..f482f3393 100644 --- a/internal/scheduling/reservations/failover/reservation_scheduling.go +++ b/internal/scheduling/reservations/failover/reservation_scheduling.go @@ -30,7 +30,7 @@ const ( PipelineAcknowledgeFailoverReservation = "kvm-acknowledge-failover-reservation" ) -func (c *FailoverReservationController) queryHypervisorsFromScheduler(ctx context.Context, vm VM, allHypervisors []string, pipeline string, resolved resolvedReservationSpec) ([]string, error) { +func (c *FailoverReservationController) queryHypervisorsFromScheduler(ctx context.Context, vm VM, allHypervisors []string, pipeline string, resSpec resolvedReservationSpec) ([]string, error) { logger := LoggerFromContext(ctx) // Build list of eligible hypervisors (excluding VM's current hypervisor) @@ -79,7 +79,10 @@ func (c *FailoverReservationController) queryHypervisorsFromScheduler(ctx contex IgnoreHosts: ignoreHypervisors, Pipeline: pipeline, AvailabilityZone: vm.AvailabilityZone, - SchedulerHints: map[string]any{"_nova_check_type": string(api.ReserveForFailoverIntent)}, + SchedulerHints: map[string]any{ + "_nova_check_type": string(api.ReserveForFailoverIntent), + api.HintKeyResourceGroup: resSpec.ResourceGroup(vm.FlavorName), + }, } logger.V(1).Info("scheduling failover reservation", @@ -113,7 +116,7 @@ func (c *FailoverReservationController) tryReuseExistingReservation( vm VM, failoverReservations []v1alpha1.Reservation, allHypervisors []string, - resolved resolvedReservationSpec, + resSpec resolvedReservationSpec, ) *v1alpha1.Reservation { logger := LoggerFromContext(ctx) @@ -254,7 +257,7 @@ func (c *FailoverReservationController) scheduleAndBuildNewFailoverReservation( allHypervisors []string, failoverReservations []v1alpha1.Reservation, excludeHypervisors map[string]bool, - resolved resolvedReservationSpec, + resSpec resolvedReservationSpec, ) (*v1alpha1.Reservation, error) { logger := LoggerFromContext(ctx) diff --git a/internal/scheduling/reservations/failover/reservation_scheduling_test.go b/internal/scheduling/reservations/failover/reservation_scheduling_test.go index 7ea8bcb51..4ce4e0016 100644 --- a/internal/scheduling/reservations/failover/reservation_scheduling_test.go +++ b/internal/scheduling/reservations/failover/reservation_scheduling_test.go @@ -292,10 +292,10 @@ func TestResolveVMForSchedulingAndNewFailoverReservation(t *testing.T) { useFlavorGroupResources: false, flavorGroups: flavorGroups, wantFlavorName: "hana_c60_m960", // VM's own flavor name - wantFlavorGroupName: "", // no flavor group (disabled) - wantResourceGroup: "hana_c60_m960", // ResourceGroup = fallback to flavor name - wantMemoryMB: 983040, // VM's own memory - wantVCPUs: 60, // VM's own vcpus + wantFlavorGroupName: "", // no flavor group (disabled) + wantResourceGroup: "hana_c60_m960", // ResourceGroup = fallback to flavor name + wantMemoryMB: 983040, // VM's own memory + wantVCPUs: 60, // VM's own vcpus }, { name: "falls back to VM resources when flavor not in any group", @@ -312,8 +312,8 @@ func TestResolveVMForSchedulingAndNewFailoverReservation(t *testing.T) { useFlavorGroupResources: true, flavorGroups: flavorGroups, wantFlavorName: "unknown_flavor", // VM's own flavor name (fallback) - wantFlavorGroupName: "", // no flavor group (not found) - wantResourceGroup: "unknown_flavor", // ResourceGroup = fallback to flavor name + wantFlavorGroupName: "", // no flavor group (not found) + wantResourceGroup: "unknown_flavor", // ResourceGroup = fallback to flavor name wantMemoryMB: 16384, // VM's own memory (fallback) wantVCPUs: 8, // VM's own vcpus (fallback) }, @@ -331,11 +331,11 @@ func TestResolveVMForSchedulingAndNewFailoverReservation(t *testing.T) { }, useFlavorGroupResources: true, flavorGroups: nil, // nil flavor groups - wantFlavorName: "hana_c60_m960", // VM's own flavor name (fallback) - wantFlavorGroupName: "", // no flavor group (nil groups) - wantResourceGroup: "hana_c60_m960", // ResourceGroup = fallback to flavor name - wantMemoryMB: 983040, // VM's own memory (fallback) - wantVCPUs: 60, // VM's own vcpus (fallback) + wantFlavorName: "hana_c60_m960", // VM's own flavor name (fallback) + wantFlavorGroupName: "", // no flavor group (nil groups) + wantResourceGroup: "hana_c60_m960", // ResourceGroup = fallback to flavor name + wantMemoryMB: 983040, // VM's own memory (fallback) + wantVCPUs: 60, // VM's own vcpus (fallback) }, } From 04d26463cbf28897a0b52193084294cfcd88180f Mon Sep 17 00:00:00 2001 From: Malte Viering Date: Fri, 24 Apr 2026 15:57:29 +0000 Subject: [PATCH 5/8] fix: address PR review feedback for flavor group support - Hoist resolveVMSpecForScheduling above inner loop (was redundantly called N times per VM instead of once) - Use hv1.ResourceMemory/hv1.ResourceCPU constants instead of string literals for consistency with test code - Keep binary MiB convention (1024*1024) for memory byte conversion, matching commitments/state.go and vm_source.go (the filter's /1_000_000 is only an approximation for slot counting, not a byte storage convention) - Demote flavor group 'not found' log from Info to V(1) to avoid flooding logs for non-grouped flavors - Add validation to consolidation weigher: enforce totalCountWeight >= 0, sameSpecPenalty >= 0, sameSpecPenalty < totalCountWeight - Add kubectl printcolumn for failover ResourceGroup (priority=1) --- api/v1alpha1/reservation_types.go | 1 + .../files/crds/cortex.cloud_reservations.yaml | 4 ++++ .../kvm_failover_reservation_consolidation.go | 12 ++++++++++++ .../scheduling/reservations/failover/controller.go | 6 +++--- .../scheduling/reservations/failover/helpers.go | 8 ++++---- .../failover/reservation_scheduling_test.go | 14 ++++++++------ 6 files changed, 32 insertions(+), 13 deletions(-) diff --git a/api/v1alpha1/reservation_types.go b/api/v1alpha1/reservation_types.go index 131c0788b..21c96efad 100644 --- a/api/v1alpha1/reservation_types.go +++ b/api/v1alpha1/reservation_types.go @@ -213,6 +213,7 @@ type ReservationStatus struct { // +kubebuilder:printcolumn:name="Host",type="string",JSONPath=".status.host" // +kubebuilder:printcolumn:name="Ready",type="string",JSONPath=".status.conditions[?(@.type=='Ready')].status" // +kubebuilder:printcolumn:name="ResourceGroup",type="string",JSONPath=".spec.committedResourceReservation.resourceGroup" +// +kubebuilder:printcolumn:name="HA ResourceGroup",type="string",JSONPath=".spec.failoverReservation.resourceGroup",priority=1 // +kubebuilder:printcolumn:name="Project",type="string",JSONPath=".spec.committedResourceReservation.projectID" // +kubebuilder:printcolumn:name="AZ",type="string",JSONPath=".spec.availabilityZone" // +kubebuilder:printcolumn:name="StartTime",type="string",JSONPath=".spec.startTime",priority=1 diff --git a/helm/library/cortex/files/crds/cortex.cloud_reservations.yaml b/helm/library/cortex/files/crds/cortex.cloud_reservations.yaml index 8ab0fade9..686aa60fe 100644 --- a/helm/library/cortex/files/crds/cortex.cloud_reservations.yaml +++ b/helm/library/cortex/files/crds/cortex.cloud_reservations.yaml @@ -27,6 +27,10 @@ spec: - jsonPath: .spec.committedResourceReservation.resourceGroup name: ResourceGroup type: string + - jsonPath: .spec.failoverReservation.resourceGroup + name: HA ResourceGroup + priority: 1 + type: string - jsonPath: .spec.committedResourceReservation.projectID name: Project type: string diff --git a/internal/scheduling/nova/plugins/weighers/kvm_failover_reservation_consolidation.go b/internal/scheduling/nova/plugins/weighers/kvm_failover_reservation_consolidation.go index 6bedb7d1c..377cae25d 100644 --- a/internal/scheduling/nova/plugins/weighers/kvm_failover_reservation_consolidation.go +++ b/internal/scheduling/nova/plugins/weighers/kvm_failover_reservation_consolidation.go @@ -5,6 +5,7 @@ package weighers import ( "context" + "errors" "log/slog" api "github.com/cobaltcore-dev/cortex/api/external/nova" @@ -26,6 +27,17 @@ type KVMFailoverReservationConsolidationOpts struct { } func (o KVMFailoverReservationConsolidationOpts) Validate() error { + w := o.GetTotalCountWeight() + p := o.GetSameSpecPenalty() + if w < 0 { + return errors.New("totalCountWeight must be non-negative") + } + if p < 0 { + return errors.New("sameSpecPenalty must be non-negative") + } + if w > 0 && p >= w { + return errors.New("sameSpecPenalty must be less than totalCountWeight") + } return nil } diff --git a/internal/scheduling/reservations/failover/controller.go b/internal/scheduling/reservations/failover/controller.go index ebd709b27..c66295293 100644 --- a/internal/scheduling/reservations/failover/controller.go +++ b/internal/scheduling/reservations/failover/controller.go @@ -620,10 +620,10 @@ func (c *FailoverReservationController) reconcileCreateAndAssignReservations( vmLogger := LoggerFromContext(vmCtx).WithValues("vmUUID", need.VM.UUID) vmLogger.Info("processing VM for failover reservation") - for i := range need.Count { - // Resolve VM resources once per VM (may use LargestFlavor from flavor group) - resSpec := resolveVMSpecForScheduling(vmCtx, need.VM, c.Config.UseFlavorGroupResources, flavorGroups) + // Resolve VM resources once per VM (may use LargestFlavor from flavor group) + resSpec := resolveVMSpecForScheduling(vmCtx, need.VM, c.Config.UseFlavorGroupResources, flavorGroups) + for i := range need.Count { reusedRes := c.tryReuseExistingReservation(vmCtx, need.VM, failoverReservations, allHypervisors, resSpec) if reusedRes != nil { diff --git a/internal/scheduling/reservations/failover/helpers.go b/internal/scheduling/reservations/failover/helpers.go index 48446e0c0..8623f117f 100644 --- a/internal/scheduling/reservations/failover/helpers.go +++ b/internal/scheduling/reservations/failover/helpers.go @@ -38,8 +38,8 @@ func (r resolvedReservationSpec) ResourceGroup(fallback string) string { // (e.g., nova filter_has_enough_capacity) uses "cpu". func (r resolvedReservationSpec) HypervisorResources() map[hv1.ResourceName]resource.Quantity { return map[hv1.ResourceName]resource.Quantity{ - "memory": *resource.NewQuantity(int64(r.MemoryMB)*1024*1024, resource.BinarySI), //nolint:gosec // flavor memory from specs, realistically bounded - "cpu": *resource.NewQuantity(int64(r.VCPUs), resource.DecimalSI), //nolint:gosec // flavor vcpus from specs, realistically bounded + hv1.ResourceMemory: *resource.NewQuantity(int64(r.MemoryMB)*1024*1024, resource.BinarySI), //nolint:gosec // flavor memory (MiB) from specs, realistically bounded; use binary to match commitments/state.go and vm_source.go + hv1.ResourceCPU: *resource.NewQuantity(int64(r.VCPUs), resource.DecimalSI), //nolint:gosec // flavor vcpus from specs, realistically bounded } } @@ -73,7 +73,7 @@ func resolveVMSpecForScheduling( VCPUs: largest.VCPUs, } } - logger.Info("flavor group lookup failed, falling back to VM resources", + logger.V(1).Info("flavor group lookup failed, falling back to VM resources", "vmFlavor", vm.FlavorName, "error", err) } @@ -81,7 +81,7 @@ func resolveVMSpecForScheduling( // Fallback: use VM's own resources var memoryMB, vcpus uint64 if memory, ok := vm.Resources["memory"]; ok { - memoryMB = uint64(memory.Value() / (1024 * 1024)) //nolint:gosec // memory values won't overflow + memoryMB = uint64(memory.Value() / (1024 * 1024)) //nolint:gosec // memory values won't overflow; binary MiB matches commitments/state.go and vm_source.go } if v, ok := vm.Resources["vcpus"]; ok { vcpus = uint64(v.Value()) //nolint:gosec // vcpus values won't overflow diff --git a/internal/scheduling/reservations/failover/reservation_scheduling_test.go b/internal/scheduling/reservations/failover/reservation_scheduling_test.go index 4ce4e0016..fa987d34b 100644 --- a/internal/scheduling/reservations/failover/reservation_scheduling_test.go +++ b/internal/scheduling/reservations/failover/reservation_scheduling_test.go @@ -170,8 +170,10 @@ func TestBuildNewFailoverReservation(t *testing.T) { t.Errorf("allocated host = %v, want %v", allocatedHost, tt.vm.CurrentHypervisor) } - // Verify resources are copied from VM - // Note: VM uses "vcpus" but reservation uses "cpu" as the canonical key + // Verify resources are derived from VM + // Note: VM uses "vcpus" but reservation uses "cpu" as the canonical key. + // Memory uses binary MiB (bytes / 1024*1024 → MiB → MiB * 1024*1024), matching + // commitments/state.go and vm_source.go conventions. if tt.vm.Resources != nil { if memory, ok := tt.vm.Resources["memory"]; ok { if resMemory, ok := result.Spec.Resources[hv1.ResourceMemory]; !ok { @@ -294,7 +296,7 @@ func TestResolveVMForSchedulingAndNewFailoverReservation(t *testing.T) { wantFlavorName: "hana_c60_m960", // VM's own flavor name wantFlavorGroupName: "", // no flavor group (disabled) wantResourceGroup: "hana_c60_m960", // ResourceGroup = fallback to flavor name - wantMemoryMB: 983040, // VM's own memory + wantMemoryMB: 983040, // VM's own memory (MiB, binary) wantVCPUs: 60, // VM's own vcpus }, { @@ -314,7 +316,7 @@ func TestResolveVMForSchedulingAndNewFailoverReservation(t *testing.T) { wantFlavorName: "unknown_flavor", // VM's own flavor name (fallback) wantFlavorGroupName: "", // no flavor group (not found) wantResourceGroup: "unknown_flavor", // ResourceGroup = fallback to flavor name - wantMemoryMB: 16384, // VM's own memory (fallback) + wantMemoryMB: 16384, // VM's own memory (MiB, binary) wantVCPUs: 8, // VM's own vcpus (fallback) }, { @@ -334,7 +336,7 @@ func TestResolveVMForSchedulingAndNewFailoverReservation(t *testing.T) { wantFlavorName: "hana_c60_m960", // VM's own flavor name (fallback) wantFlavorGroupName: "", // no flavor group (nil groups) wantResourceGroup: "hana_c60_m960", // ResourceGroup = fallback to flavor name - wantMemoryMB: 983040, // VM's own memory (fallback) + wantMemoryMB: 983040, // VM's own memory (MiB, binary) wantVCPUs: 60, // VM's own vcpus (fallback) }, } @@ -368,7 +370,7 @@ func TestResolveVMForSchedulingAndNewFailoverReservation(t *testing.T) { if !ok { t.Fatal("reservation missing memory resource") } - wantMemoryBytes := int64(tt.wantMemoryMB) * 1024 * 1024 //nolint:gosec // test values won't overflow + wantMemoryBytes := int64(tt.wantMemoryMB) * 1024 * 1024 //nolint:gosec // test values won't overflow; binary MiB matches OpenStack convention if resMemory.Value() != wantMemoryBytes { t.Errorf("reservation memory = %d bytes, want %d bytes", resMemory.Value(), wantMemoryBytes) } From 6a919fff787018a9254a4e366623e8610e372e21 Mon Sep 17 00:00:00 2001 From: Malte Viering Date: Mon, 27 Apr 2026 09:37:26 +0200 Subject: [PATCH 6/8] pr comment --- .../kvm_failover_reservation_consolidation.go | 3 + ...failover_reservation_consolidation_test.go | 82 +++++++++++++++++-- 2 files changed, 80 insertions(+), 5 deletions(-) diff --git a/internal/scheduling/nova/plugins/weighers/kvm_failover_reservation_consolidation.go b/internal/scheduling/nova/plugins/weighers/kvm_failover_reservation_consolidation.go index 377cae25d..727afce33 100644 --- a/internal/scheduling/nova/plugins/weighers/kvm_failover_reservation_consolidation.go +++ b/internal/scheduling/nova/plugins/weighers/kvm_failover_reservation_consolidation.go @@ -35,6 +35,9 @@ func (o KVMFailoverReservationConsolidationOpts) Validate() error { if p < 0 { return errors.New("sameSpecPenalty must be non-negative") } + if w == 0 && p > 0 { + return errors.New("sameSpecPenalty must be zero when totalCountWeight is zero") + } if w > 0 && p >= w { return errors.New("sameSpecPenalty must be less than totalCountWeight") } diff --git a/internal/scheduling/nova/plugins/weighers/kvm_failover_reservation_consolidation_test.go b/internal/scheduling/nova/plugins/weighers/kvm_failover_reservation_consolidation_test.go index e477858f2..62d69d319 100644 --- a/internal/scheduling/nova/plugins/weighers/kvm_failover_reservation_consolidation_test.go +++ b/internal/scheduling/nova/plugins/weighers/kvm_failover_reservation_consolidation_test.go @@ -286,11 +286,83 @@ func TestKVMFailoverReservationConsolidationOpts_Defaults(t *testing.T) { } func TestKVMFailoverReservationConsolidationOpts_Validate(t *testing.T) { - opts := KVMFailoverReservationConsolidationOpts{ - TotalCountWeight: testlib.Ptr(2.0), - SameSpecPenalty: testlib.Ptr(0.5), + tests := []struct { + name string + opts KVMFailoverReservationConsolidationOpts + wantErr string + }{ + { + name: "valid: both set, p < w", + opts: KVMFailoverReservationConsolidationOpts{ + TotalCountWeight: testlib.Ptr(2.0), + SameSpecPenalty: testlib.Ptr(0.5), + }, + }, + { + name: "valid: defaults (nil)", + opts: KVMFailoverReservationConsolidationOpts{}, + }, + { + name: "valid: both zero", + opts: KVMFailoverReservationConsolidationOpts{ + TotalCountWeight: testlib.Ptr(0.0), + SameSpecPenalty: testlib.Ptr(0.0), + }, + }, + { + name: "invalid: negative totalCountWeight", + opts: KVMFailoverReservationConsolidationOpts{ + TotalCountWeight: testlib.Ptr(-1.0), + }, + wantErr: "totalCountWeight must be non-negative", + }, + { + name: "invalid: negative sameSpecPenalty", + opts: KVMFailoverReservationConsolidationOpts{ + SameSpecPenalty: testlib.Ptr(-0.1), + }, + wantErr: "sameSpecPenalty must be non-negative", + }, + { + name: "invalid: p >= w", + opts: KVMFailoverReservationConsolidationOpts{ + TotalCountWeight: testlib.Ptr(1.0), + SameSpecPenalty: testlib.Ptr(1.0), + }, + wantErr: "sameSpecPenalty must be less than totalCountWeight", + }, + { + name: "invalid: w=0 with p>0 (default penalty with zero weight)", + opts: KVMFailoverReservationConsolidationOpts{ + TotalCountWeight: testlib.Ptr(0.0), + // SameSpecPenalty defaults to 0.1 + }, + wantErr: "sameSpecPenalty must be zero when totalCountWeight is zero", + }, + { + name: "invalid: w=0 with explicit p>0", + opts: KVMFailoverReservationConsolidationOpts{ + TotalCountWeight: testlib.Ptr(0.0), + SameSpecPenalty: testlib.Ptr(0.5), + }, + wantErr: "sameSpecPenalty must be zero when totalCountWeight is zero", + }, } - if err := opts.Validate(); err != nil { - t.Errorf("expected no error, got %v", err) + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := tt.opts.Validate() + if tt.wantErr == "" { + if err != nil { + t.Errorf("expected no error, got %v", err) + } + } else { + if err == nil { + t.Errorf("expected error %q, got nil", tt.wantErr) + } else if err.Error() != tt.wantErr { + t.Errorf("expected error %q, got %q", tt.wantErr, err.Error()) + } + } + }) } } From 0fbe3025997b2e865d4e39f8623891041c609866 Mon Sep 17 00:00:00 2001 From: Malte Viering Date: Mon, 27 Apr 2026 09:39:53 +0200 Subject: [PATCH 7/8] enable useFlavorGroupResources --- helm/bundles/cortex-nova/values.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/helm/bundles/cortex-nova/values.yaml b/helm/bundles/cortex-nova/values.yaml index c40849739..470763314 100644 --- a/helm/bundles/cortex-nova/values.yaml +++ b/helm/bundles/cortex-nova/values.yaml @@ -203,6 +203,8 @@ cortex-scheduling-controllers: revalidationInterval: 30m # Prevents creating multiple new reservations on the same hypervisor per cycle limitOneNewReservationPerHypervisor: false + # Size failover reservations based on LargestFlavor in the flavor group + useFlavorGroupResources: true cortex-knowledge-controllers: <<: *cortex From 7405b73e5d9f91addd61c513df53d5b49bcec4fa Mon Sep 17 00:00:00 2001 From: Malte Viering Date: Mon, 27 Apr 2026 14:59:09 +0200 Subject: [PATCH 8/8] set useFlavorGroupResources to false --- helm/bundles/cortex-nova/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/helm/bundles/cortex-nova/values.yaml b/helm/bundles/cortex-nova/values.yaml index 470763314..65ad879dd 100644 --- a/helm/bundles/cortex-nova/values.yaml +++ b/helm/bundles/cortex-nova/values.yaml @@ -204,7 +204,7 @@ cortex-scheduling-controllers: # Prevents creating multiple new reservations on the same hypervisor per cycle limitOneNewReservationPerHypervisor: false # Size failover reservations based on LargestFlavor in the flavor group - useFlavorGroupResources: true + useFlavorGroupResources: false cortex-knowledge-controllers: <<: *cortex