Skip to content
5 changes: 5 additions & 0 deletions api/external/nova/messages.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,11 @@ const (
ReserveForFailoverIntent v1alpha1.SchedulingIntent = "reserve_for_failover"
// ReserveForCommittedResourceIntent indicates that the request is for CR reservation scheduling.
ReserveForCommittedResourceIntent v1alpha1.SchedulingIntent = "reserve_for_committed_resource"

// HintKeyResourceGroup is the scheduler hint key used to pass the resource group
// (e.g., flavor group name) for failover reservation scheduling.
// The weigher uses this to compare against existing reservations' ResourceGroup.
HintKeyResourceGroup = "_cortex_resource_group"
)

// GetIntent analyzes the request spec and determines the intent of the scheduling request.
Expand Down
7 changes: 7 additions & 0 deletions api/v1alpha1/reservation_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package v1alpha1

import (
hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1"
"k8s.io/apimachinery/pkg/api/meta"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
Expand Down Expand Up @@ -212,6 +213,7 @@ type ReservationStatus struct {
// +kubebuilder:printcolumn:name="Host",type="string",JSONPath=".status.host"
// +kubebuilder:printcolumn:name="Ready",type="string",JSONPath=".status.conditions[?(@.type=='Ready')].status"
// +kubebuilder:printcolumn:name="ResourceGroup",type="string",JSONPath=".spec.committedResourceReservation.resourceGroup"
// +kubebuilder:printcolumn:name="HA ResourceGroup",type="string",JSONPath=".spec.failoverReservation.resourceGroup",priority=1
// +kubebuilder:printcolumn:name="Project",type="string",JSONPath=".spec.committedResourceReservation.projectID"
// +kubebuilder:printcolumn:name="AZ",type="string",JSONPath=".spec.availabilityZone"
// +kubebuilder:printcolumn:name="StartTime",type="string",JSONPath=".spec.startTime",priority=1
Expand Down Expand Up @@ -248,6 +250,11 @@ type ReservationList struct {
Items []Reservation `json:"items"`
}

// IsReady returns true if the reservation has the Ready condition set to True.
func (r *Reservation) IsReady() bool {
return meta.IsStatusConditionTrue(r.Status.Conditions, ReservationConditionReady)
}

func init() {
SchemeBuilder.Register(&Reservation{}, &ReservationList{})
}
2 changes: 2 additions & 0 deletions helm/bundles/cortex-nova/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,8 @@ cortex-scheduling-controllers:
revalidationInterval: 30m
# Prevents creating multiple new reservations on the same hypervisor per cycle
limitOneNewReservationPerHypervisor: false
# Size failover reservations based on LargestFlavor in the flavor group
useFlavorGroupResources: false

cortex-knowledge-controllers:
<<: *cortex
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ spec:
- jsonPath: .spec.committedResourceReservation.resourceGroup
name: ResourceGroup
type: string
- jsonPath: .spec.failoverReservation.resourceGroup
name: HA ResourceGroup
priority: 1
type: string
- jsonPath: .spec.committedResourceReservation.projectID
name: Project
type: string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,7 @@ import (
"strconv"
"strings"

"k8s.io/apimachinery/pkg/api/meta"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/cobaltcore-dev/cortex/api/v1alpha1"
Expand Down Expand Up @@ -177,8 +175,7 @@ func aggregateReservationsByHost(reservations []v1alpha1.Reservation) (
continue
}

readyCondition := meta.FindStatusCondition(reservation.Status.Conditions, v1alpha1.ReservationConditionReady)
if readyCondition == nil || readyCondition.Status != metav1.ConditionTrue {
if !reservation.IsReady() {
continue
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ import (
"github.com/cobaltcore-dev/cortex/api/v1alpha1"
"github.com/cobaltcore-dev/cortex/internal/scheduling/lib"
hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1"
"k8s.io/apimachinery/pkg/api/meta"
"k8s.io/apimachinery/pkg/api/resource"
)

Expand Down Expand Up @@ -92,7 +91,7 @@ func (s *FilterHasEnoughCapacity) Run(traceLog *slog.Logger, request api.Externa
return nil, err
}
for _, reservation := range reservations.Items {
if !meta.IsStatusConditionTrue(reservation.Status.Conditions, v1alpha1.ReservationConditionReady) {
if !reservation.IsReady() {
continue // Only consider active reservations (Ready=True).
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import (
api "github.com/cobaltcore-dev/cortex/api/external/nova"
"github.com/cobaltcore-dev/cortex/api/v1alpha1"
"github.com/cobaltcore-dev/cortex/internal/scheduling/lib"
"k8s.io/apimachinery/pkg/api/meta"
)

// Options for the KVM failover evacuation weigher.
Expand Down Expand Up @@ -72,8 +71,7 @@ func (s *KVMFailoverEvacuationStep) Run(traceLog *slog.Logger, request api.Exter
failoverHosts := make(map[string]bool)
for _, reservation := range reservations.Items {
// Only consider active failover reservations (Ready condition is True)
readyCondition := meta.FindStatusCondition(reservation.Status.Conditions, v1alpha1.ReservationConditionReady)
if readyCondition == nil || readyCondition.Status != "True" {
if !reservation.IsReady() {
continue
}
if reservation.Spec.Type != v1alpha1.ReservationTypeFailover {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
// Copyright SAP SE
// SPDX-License-Identifier: Apache-2.0

package weighers

import (
"context"
"errors"
"log/slog"

api "github.com/cobaltcore-dev/cortex/api/external/nova"
"github.com/cobaltcore-dev/cortex/api/v1alpha1"
"github.com/cobaltcore-dev/cortex/internal/scheduling/lib"
)

// Options for the KVM failover reservation consolidation weigher.
type KVMFailoverReservationConsolidationOpts struct {
// Weight multiplier for the total failover reservation count per host (consolidation signal).
// Higher values more aggressively pack failover reservations onto fewer hosts.
// Default: 1.0
TotalCountWeight *float64 `json:"totalCountWeight,omitempty"`
// Penalty multiplier for same-spec reservation count per host (diversity signal).
// Higher values more aggressively avoid clustering reservations of the same size on one host.
// Should be less than TotalCountWeight to ensure consolidation is the primary goal.
// Default: 0.1
SameSpecPenalty *float64 `json:"sameSpecPenalty,omitempty"`
}

func (o KVMFailoverReservationConsolidationOpts) Validate() error {
w := o.GetTotalCountWeight()
p := o.GetSameSpecPenalty()
if w < 0 {
return errors.New("totalCountWeight must be non-negative")
}
if p < 0 {
return errors.New("sameSpecPenalty must be non-negative")
}
if w == 0 && p > 0 {
return errors.New("sameSpecPenalty must be zero when totalCountWeight is zero")
}
if w > 0 && p >= w {
return errors.New("sameSpecPenalty must be less than totalCountWeight")
}
return nil
}
Comment thread
umswmayj marked this conversation as resolved.

func (o KVMFailoverReservationConsolidationOpts) GetTotalCountWeight() float64 {
if o.TotalCountWeight == nil {
return 1.0
}
return *o.TotalCountWeight
}

func (o KVMFailoverReservationConsolidationOpts) GetSameSpecPenalty() float64 {
if o.SameSpecPenalty == nil {
return 0.1
}
return *o.SameSpecPenalty
}

// KVMFailoverReservationConsolidationStep weighs hosts for failover reservation placement.
// It encourages consolidating failover reservations onto as few hosts as possible (primary goal),
// while preferring hosts with fewer reservations of the same ResourceGroup (secondary tiebreaker).
//
// The ResourceGroup is passed via the scheduler hint "_cortex_resource_group" and compared against
// each existing reservation's Spec.FailoverReservation.ResourceGroup. This groups reservations
// by flavor group (or individual flavor name when no group exists).
//
// Score formula (normalized by total reservation count T):
//
// score = (totalCountWeight / T) × hostCount - (sameSpecPenalty / T) × sameGroupCount
//
// This produces bounded output (~0 to 1) that plays nicely with other weighers.
type KVMFailoverReservationConsolidationStep struct {
lib.BaseWeigher[api.ExternalSchedulerRequest, KVMFailoverReservationConsolidationOpts]
}

// Run the weigher step.
// For reserve_for_failover requests, hosts are scored based on existing failover reservation density
// and same-spec diversity. For all other request types, this weigher has no effect.
func (s *KVMFailoverReservationConsolidationStep) Run(traceLog *slog.Logger, request api.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineStepResult, error) {
result := s.IncludeAllHostsFromRequest(request)

intent, err := request.GetIntent()
if err != nil || intent != api.ReserveForFailoverIntent {
traceLog.Info("skipping failover reservation consolidation weigher for non-failover-reservation request")
return result, nil //nolint:nilerr // intentionally skip weigher on error
}
Comment thread
umswmayj marked this conversation as resolved.

// Extract the resource group from the scheduler hint.
// This identifies which "spec group" the incoming reservation belongs to.
// If the hint is missing, requestResourceGroup will be empty and the same-group penalty is skipped.
requestResourceGroup, _ := request.Spec.Data.GetSchedulerHintStr(api.HintKeyResourceGroup) //nolint:errcheck // missing hint is fine, same-group penalty is simply skipped

// Fetch all reservations.
var reservations v1alpha1.ReservationList
if err := s.Client.List(context.Background(), &reservations); err != nil {
return nil, err
}

// Count failover reservations per host, and same-group reservations per host.
totalPerHost := make(map[string]float64)
sameGroupPerHost := make(map[string]float64)
totalReservations := 0

for _, reservation := range reservations.Items {
// Only consider active failover reservations (Ready condition is True).
if !reservation.IsReady() {
continue
}
if reservation.Spec.Type != v1alpha1.ReservationTypeFailover {
continue
}

host := reservation.Status.Host
if host == "" {
continue
}

totalReservations++
totalPerHost[host]++

// Check if this reservation belongs to the same resource group as the request.
if requestResourceGroup != "" && reservation.Spec.FailoverReservation != nil &&
reservation.Spec.FailoverReservation.ResourceGroup == requestResourceGroup {
sameGroupPerHost[host]++
}
}

// If there are no failover reservations, the weigher has no information to act on.
if totalReservations == 0 {
traceLog.Info("no active failover reservations found, skipping consolidation weigher")
return result, nil
}

totalCountWeight := s.Options.GetTotalCountWeight()
sameSpecPenalty := s.Options.GetSameSpecPenalty()
t := float64(totalReservations)

for _, host := range request.Hosts {
hostTotal := totalPerHost[host.ComputeHost]
hostSameGroup := sameGroupPerHost[host.ComputeHost]

// Normalized score: bounded output for compatibility with other weighers.
score := (totalCountWeight/t)*hostTotal - (sameSpecPenalty/t)*hostSameGroup

result.Activations[host.ComputeHost] = score
traceLog.Info("calculated failover consolidation score for host",
"host", host.ComputeHost,
"totalOnHost", hostTotal,
"sameGroupOnHost", hostSameGroup,
"resourceGroup", requestResourceGroup,
"totalReservations", totalReservations,
"score", score)
}

return result, nil
}

func init() {
Index["kvm_failover_reservation_consolidation"] = func() NovaWeigher {
return &KVMFailoverReservationConsolidationStep{}
}
}
Loading
Loading