diff --git a/api/v1alpha1/committed_resource_types.go b/api/v1alpha1/committed_resource_types.go index 5ed61a11a..a6f1bd217 100644 --- a/api/v1alpha1/committed_resource_types.go +++ b/api/v1alpha1/committed_resource_types.go @@ -90,6 +90,18 @@ type CommittedResourceSpec struct { // +kubebuilder:validation:Enum=planned;pending;guaranteed;confirmed;superseded;expired // +kubebuilder:validation:Required State CommitmentStatus `json:"state"` + + // AllowRejection controls what the CommittedResource controller does when placement fails + // for a guaranteed or confirmed commitment. + // true — controller may reject: on failure, child Reservations are rolled back and the CR + // is marked Rejected. Use this when the caller is making a first-time placement + // decision and a "no" answer is acceptable (e.g. the change-commitments API). + // false — controller must retry: on failure, existing child Reservations are kept and the + // CR is set to Reserving so the controller retries later. Use this when the caller + // is restoring already-committed state that Cortex must honour (e.g. the syncer). + // Only meaningful for state=guaranteed or state=confirmed; ignored for all other states. + // +kubebuilder:validation:Optional + AllowRejection bool `json:"allowRejection,omitempty"` } // CommittedResourceStatus defines the observed state of CommittedResource. @@ -131,6 +143,12 @@ type CommittedResourceStatus struct { Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"` } +const ( + // CommittedResourceConditionReady indicates whether the CommittedResource has been + // successfully reconciled into active Reservation CRDs. + CommittedResourceConditionReady = "Ready" +) + // +kubebuilder:object:root=true // +kubebuilder:subresource:status // +kubebuilder:resource:scope=Cluster diff --git a/cmd/manager/main.go b/cmd/manager/main.go index b74b21d1b..ba1cd52e8 100644 --- a/cmd/manager/main.go +++ b/cmd/manager/main.go @@ -548,6 +548,15 @@ func main() { setupLog.Error(err, "unable to create controller", "controller", "CommitmentReservation") os.Exit(1) } + + if err := (&commitments.CommittedResourceController{ + Client: multiclusterClient, + Scheme: mgr.GetScheme(), + Conf: commitmentsConfig, + }).SetupWithManager(mgr, multiclusterClient); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "CommittedResource") + os.Exit(1) + } } if slices.Contains(mainConfig.EnabledControllers, "datasource-controllers") { setupLog.Info("enabling controller", "controller", "datasource-controllers") diff --git a/docs/reservations/committed-resource-reservations.md b/docs/reservations/committed-resource-reservations.md index 52890bf75..95f8b8bd5 100644 --- a/docs/reservations/committed-resource-reservations.md +++ b/docs/reservations/committed-resource-reservations.md @@ -7,17 +7,20 @@ Cortex reserves hypervisor capacity for customers who pre-commit resources (comm - [Configuration and Observability](#configuration-and-observability) - [Lifecycle Management](#lifecycle-management) - [State (CRDs)](#state-crds) - - [CR Reservation Lifecycle](#cr-reservation-lifecycle) - - [VM Lifecycle](#vm-lifecycle) - - [Capacity Blocking](#capacity-blocking) + - [CR Commitment Lifecycle](#cr-commitment-lifecycle) + - [CommittedResource Controller](#committedresource-controller) + - [Reservation Lifecycle](#reservation-lifecycle) + - [VM Lifecycle](#vm-lifecycle) + - [Capacity Blocking](#capacity-blocking) + - [Reservation Controller](#reservation-controller) - [Change-Commitments API](#change-commitments-api) - [Syncer Task](#syncer-task) - - [Controller (Reconciliation)](#controller-reconciliation) - [Usage API](#usage-api) The CR reservation implementation is located in `internal/scheduling/reservations/commitments/`. Key components include: -- Controller logic (`controller.go`) -- API handlers in the `api/` subpackage (`change_commitments.go`, `report_capacity.go`, `report_usage.go`) +- `CommittedResource` controller (`committed_resource_controller.go`) — acceptance, rejection, child Reservation CRUD +- `Reservation` controller (`reservation_controller.go`) — placement, VM allocation verification +- API endpoints (`api_*.go`) - Capacity and usage calculation logic (`capacity.go`, `usage.go`) - Syncer for periodic state sync (`syncer.go`) @@ -35,47 +38,103 @@ The CR reservation implementation is located in `internal/scheduling/reservation ## Lifecycle Management -### State (CRDs) -Defined in `api/v1alpha1/reservation_types.go`, which contains definitions for CR reservations and failover reservations (see [./failover-reservations.md](./failover-reservations.md)). - -A reservation CRD represents a single reservation slot on a hypervisor, which holds multiple VMs. -A single CR entry typically refers to multiple reservation CRDs (slots). - - -### CR Reservation Lifecycle +The system is organized around two CRD types and two controllers. `CommittedResource` CRDs represent customer commitments; `Reservation` CRDs represent individual hypervisor capacity slots. Each has its own controller with a well-defined responsibility boundary. ```mermaid flowchart LR subgraph State + CR[(CommittedResource CRDs)] Res[(Reservation CRDs)] end - + Syncer[Syncer Task] ChangeAPI[Change API] CapacityAPI[Capacity API] - Controller[Controller] + CRCtrl[CommittedResource Controller] + ResCtrl[Reservation Controller] UsageAPI[Usage API] Scheduler[Scheduler API] - - ChangeAPI -->|CRUD| Res - Syncer -->|CRUD| Res + + ChangeAPI -->|CRUD| CR + Syncer -->|CRUD| CR + UsageAPI -->|read| CR UsageAPI -->|read| Res CapacityAPI -->|read| Res CapacityAPI -->|capacity request| Scheduler - Res -->|watch| Controller - Controller -->|update spec/status| Res - Controller -->|reservation placement request| Scheduler + CR -->|watch| CRCtrl + CRCtrl -->|CRUD child Reservation slots| Res + CRCtrl -->|update status| CR + Res -->|watch| CRCtrl + Res -->|watch| ResCtrl + ResCtrl -->|placement request| Scheduler + ResCtrl -->|update status| Res +``` + +### State (CRDs) + +**`CommittedResource` CRD** (`committed_resource_types.go`) — primary source of truth for a commitment accepted by Cortex. One CRD per commitment UUID. Spec holds the commitment identity (project, flavor group, ...). Status holds the acceptance outcome (`Ready` condition with reason `Planned`/`Reserving`/`Rejected`) and the accepted amount. + +**`Reservation` CRD** (`reservation_types.go`) — a single reservation slot on a hypervisor, owned by a `CommittedResource`. One `CommittedResource` typically drives multiple `Reservation` CRDs (one per flavor-sized slot). See [./failover-reservations.md](./failover-reservations.md) for the failover reservation type. + +### CR Commitment Lifecycle + +The CR commitment lifecycle covers everything from a commitment being accepted by Limes through to Cortex confirming or rejecting it. The `CommittedResource` CRD is the entry point; the `CommittedResource` controller owns the acceptance decision. + +**Limes state → Cortex action:** + +| Limes State | Meaning | Cortex action | +|---|---|---| +| `planned` | Future start, no guarantee yet | No Reservations — capacity not blocked | +| `pending` | Limes asking for a yes/no decision now | One-shot attempt — accept or reject; no retry | +| `guaranteed` / `confirmed` | Capacity must be honoured | Place Reservations and keep them in sync; see failure handling below | +| `superseded` / `expired` | Commitment no longer active | Remove all child Reservations | + +**CommittedResource status conditions (Cortex-side):** + +```mermaid +stateDiagram-v2 + direction LR + state "Planned (Ready=False)" as Planned + state "Reserving (Ready=False)" as Reserving + state "Active (Ready=True)" as Active + state "Rejected (Ready=False)" as Rejected + + [*] --> Planned : state=planned + [*] --> Reserving : state=pending / guaranteed / confirmed + Planned --> Reserving : state changes to pending/guaranteed/confirmed + Reserving --> Active : placement succeeded + Reserving --> Rejected : placement failed — pending, or AllowRejection=true + Reserving --> Reserving : placement failed — retrying (AllowRejection=false) + Active --> Reserving : spec changed (e.g. resize) + Active --> [*] : state=superseded / expired + Rejected --> [*] : deleted + Planned --> [*] : deleted ``` -Reservations are managed through the Change API, Syncer Task, and Controller reconciliation. +#### CommittedResource Controller + +The controller's job is to keep child `Reservation` CRDs in sync with the desired state expressed in `Spec.Amount`. The key rules: + +- **`pending`**: Cortex is being asked for a yes/no decision. If placement fails for any reason, child Reservations are removed and the CR is marked Rejected. The caller (e.g. the change-commitments API) reads the outcome and reports back to Limes. No retry. + +- **`guaranteed` / `confirmed`**: Cortex is expected to honour the commitment. The default is to keep retrying until placement succeeds (`Ready=False, Reason=Reserving`). Callers that can accept "no" as an answer (e.g. the change-commitments API on a resize request) set `Spec.AllowRejection=true`; the controller then rejects on failure instead of retrying. + +- **On rejection**: rolls back child Reservations to the last successfully placed quantity (`Status.AcceptedAmount`). For a CR that was never accepted, this means removing all child Reservations. + +The controller communicates with the Reservation controller only through CRDs — no direct calls. + +### Reservation Lifecycle | Component | Event | Timing | Action | |-----------|-------|--------|--------| -| **Change API / Syncer** | CR Create, Resize, Delete | Immediate/Hourly | Create/update/delete Reservation CRDs | -| **Controller** | Placement | On creation | Find host via scheduler API, set `TargetHost` | -| **Controller** | Optimize unused slots | >> minutes | Assign PAYG VMs or re-place reservations | +| **Reservation Controller** | `Reservation` created | Immediate (watch) | Find host via scheduler API, set `TargetHost` | +| **Scheduling Pipeline** | VM Create, Migrate, Resize | Immediate | Add VM to `Spec.Allocations` | +| **Reservation Controller** | Reservation CRD updated | `committedResourceRequeueIntervalGracePeriod` (default: 1 min) | Defer verification for new VMs still spawning; update `Status.Allocations` | +| **Reservation Controller** | Hypervisor CRD updated (VM appeared/disappeared) | Immediate (event-driven) | Verify allocations via Hypervisor CRD; remove gone VMs from `Spec.Allocations` | +| **Reservation Controller** | Periodic safety-net | `committedResourceRequeueIntervalActive` (default: 5 min) | Same as above; catches any missed events | +| **Reservation Controller** | Optimize unused slots | >> minutes | Assign PAYG VMs or re-place reservations | -### VM Lifecycle +#### VM Lifecycle VM allocations are tracked within reservations: @@ -87,19 +146,12 @@ flowchart LR end A[Nova Scheduler] -->|VM Create/Migrate/Resize| B[Scheduling Pipeline] B -->|update Spec.Allocations| Res - Res -->|watch| C[Controller] + Res -->|watch| C[Reservation Controller] HV -->|watch - instance changes| C Res -->|periodic safety-net requeue| C C -->|update Spec/Status.Allocations| Res ``` -| Component | Event | Timing | Action | -|-----------|-------|--------|--------| -| **Scheduling Pipeline** | VM Create, Migrate, Resize | Immediate | Add VM to `Spec.Allocations` | -| **Controller** | Reservation CRD updated | `committedResourceRequeueIntervalGracePeriod` (default: 1 min) | Defer verification for new VMs still spawning; update `Status.Allocations` | -| **Controller** | Hypervisor CRD updated (VM appeared/disappeared) | Immediate (event-driven) | Verify allocations via Hypervisor CRD; remove gone VMs from `Spec.Allocations` | -| **Controller** | Periodic safety-net | `committedResourceRequeueIntervalActive` (default: 5 min) | Same as above; catches any missed events | - **Allocation fields**: - `Spec.Allocations` — Expected VMs (written by the scheduling pipeline on placement) - `Status.Allocations` — Confirmed VMs (written by the controller after verifying the VM is on the expected host) @@ -124,7 +176,7 @@ stateDiagram-v2 **Note**: VM allocations may not consume all resources of a reservation slot. A reservation with 128 GB may have VMs totaling only 96 GB if that fits the project's needs. Allocations may exceed reservation capacity (e.g., after VM resize). -### Capacity Blocking +#### Capacity Blocking **Blocking rules by allocation state:** @@ -161,6 +213,19 @@ When a reservation is being migrated to a new host, block the full `max(Spec.Res - **VM live migration within a reservation** (VM moves away from the reservation's host): handled implicitly by `hv.Status.Allocation`. Libvirt reports resource consumption on both source and target during live migration, so both hosts' `hv.Status.Allocation` already reflects the in-flight state. No special filter logic needed. The reservation controller will eventually remove the VM from the reservation once it's confirmed on the wrong host past the grace period. +#### Reservation Controller + +The `Reservation` controller (`CommitmentReservationController`) watches `Reservation` CRDs and `Hypervisor` CRDs. `MaxConcurrentReconciles=1` prevents overbooking during concurrent placements. + +**Placement** — finds hosts for new reservations (calls scheduler API) + +**Allocation Verification** — tracks VM lifecycle on reservations. The controller uses the Hypervisor CRD as the sole source of truth, with two triggers: +- New VMs (within `committedResourceAllocationGracePeriod`, default: 15 min): verification deferred — VM may still be spawning; requeued every `committedResourceRequeueIntervalGracePeriod` (default: 1 min) +- Established VMs: verified reactively when the Hypervisor CRD changes (VM appeared or disappeared in `Status.Instances`), with `committedResourceRequeueIntervalActive` (default: 5 min) as a safety-net fallback +- Missing VMs: removed from `Spec.Allocations` when not found on the Hypervisor CRD after the grace period + +**Reservation migration is not supported yet.** + ### Change-Commitments API The change-commitments API receives batched commitment changes from Limes and manages reservations accordingly. @@ -176,19 +241,6 @@ The change-commitments API receives batched commitment changes from Limes and ma The syncer task runs periodically and syncs local Reservation CRD state to match Limes' view of commitments, correcting drift from missed API calls or restarts. -### Controller (Reconciliation) - -The controller watches Reservation CRDs and performs two types of reconciliation: - -**Placement** - Finds hosts for new reservations (calls scheduler API) - -**Allocation Verification** - Tracks VM lifecycle on reservations. The controller uses the Hypervisor CRD as the sole source of truth, with two triggers: -- New VMs (within `committedResourceAllocationGracePeriod`, default: 15 min): verification deferred — VM may still be spawning; requeued every `committedResourceRequeueIntervalGracePeriod` (default: 1 min) -- Established VMs: verified reactively when the Hypervisor CRD changes (VM appeared or disappeared in `Status.Instances`), with `committedResourceRequeueIntervalActive` (default: 5 min) as a safety-net fallback -- Missing VMs: removed from `Spec.Allocations` when not found on the Hypervisor CRD after the grace period - -**Reservation migration is not supported yet.** - ### Usage API For each flavor group `X` that accepts commitments, Cortex exposes three resource types: @@ -196,4 +248,4 @@ For each flavor group `X` that accepts commitments, Cortex exposes three resourc - `hw_version_X_cores` — CPU cores derived from RAM via fixed ratio (`HandlesCommitments=false`) - `hw_version_X_instances` — instance count (`HandlesCommitments=false`) -For each VM, the API reports whether it accounts to a specific commitment or PAYG. This assignment is deterministic and may differ from the actual Cortex internal assignment used for scheduling. \ No newline at end of file +For each VM, the API reports whether it accounts to a specific commitment or PAYG. This assignment is deterministic and may differ from the actual Cortex internal assignment used for scheduling. diff --git a/helm/library/cortex/files/crds/cortex.cloud_committedresources.yaml b/helm/library/cortex/files/crds/cortex.cloud_committedresources.yaml index 73cc8f9a2..092827edd 100644 --- a/helm/library/cortex/files/crds/cortex.cloud_committedresources.yaml +++ b/helm/library/cortex/files/crds/cortex.cloud_committedresources.yaml @@ -75,6 +75,18 @@ spec: spec: description: CommittedResourceSpec defines the desired state of CommittedResource, properties: + allowRejection: + description: |- + AllowRejection controls what the CommittedResource controller does when placement fails + for a guaranteed or confirmed commitment. + true — controller may reject: on failure, child Reservations are rolled back and the CR + is marked Rejected. Use this when the caller is making a first-time placement + decision and a "no" answer is acceptable (e.g. the change-commitments API). + false — controller must retry: on failure, existing child Reservations are kept and the + CR is set to Reserving so the controller retries later. Use this when the caller + is restoring already-committed state that Cortex must honour (e.g. the syncer). + Only meaningful for state=guaranteed or state=confirmed; ignored for all other states. + type: boolean amount: anyOf: - type: integer diff --git a/internal/scheduling/reservations/commitments/committed_resource_controller.go b/internal/scheduling/reservations/commitments/committed_resource_controller.go new file mode 100644 index 000000000..a25d63e3a --- /dev/null +++ b/internal/scheduling/reservations/commitments/committed_resource_controller.go @@ -0,0 +1,279 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package commitments + +import ( + "context" + "fmt" + + "github.com/go-logr/logr" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/handler" + + "github.com/cobaltcore-dev/cortex/api/v1alpha1" + "github.com/cobaltcore-dev/cortex/internal/scheduling/reservations" + "github.com/cobaltcore-dev/cortex/pkg/multicluster" +) + +const crFinalizer = "committed-resource.reservations.cortex.cloud/cleanup" + +// CommittedResourceController reconciles CommittedResource CRDs and owns all child Reservation CRUD. +type CommittedResourceController struct { + client.Client + Scheme *runtime.Scheme + Conf Config +} + +func (r *CommittedResourceController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + var cr v1alpha1.CommittedResource + if err := r.Get(ctx, req.NamespacedName, &cr); err != nil { + return ctrl.Result{}, client.IgnoreNotFound(err) + } + + ctx = WithNewGlobalRequestID(ctx) + logger := LoggerFromContext(ctx).WithValues( + "component", "committed-resource-controller", + "committedResource", req.Name, + ) + + if !cr.DeletionTimestamp.IsZero() { + return r.reconcileDeletion(ctx, logger, &cr) + } + + if !controllerutil.ContainsFinalizer(&cr, crFinalizer) { + controllerutil.AddFinalizer(&cr, crFinalizer) + if err := r.Update(ctx, &cr); err != nil { + return ctrl.Result{}, fmt.Errorf("failed to add finalizer: %w", err) + } + return ctrl.Result{}, nil + } + + switch cr.Spec.State { + case v1alpha1.CommitmentStatusPlanned: + return ctrl.Result{}, r.setNotReady(ctx, &cr, "Planned", "commitment is not yet active") + case v1alpha1.CommitmentStatusPending: + return r.reconcilePending(ctx, logger, &cr) + case v1alpha1.CommitmentStatusGuaranteed, v1alpha1.CommitmentStatusConfirmed: + return r.reconcileCommitted(ctx, logger, &cr) + case v1alpha1.CommitmentStatusSuperseded, v1alpha1.CommitmentStatusExpired: + return r.reconcileInactive(ctx, logger, &cr) + default: + logger.Info("unknown commitment state, skipping", "state", cr.Spec.State) + return ctrl.Result{}, nil + } +} + +// reconcilePending handles a one-shot confirmation attempt (Limes state: pending). +// If placement fails for any reason, all partial reservations are removed and the +// CR is marked Rejected so the HTTP API can report the outcome back to Limes. +func (r *CommittedResourceController) reconcilePending(ctx context.Context, logger logr.Logger, cr *v1alpha1.CommittedResource) (ctrl.Result, error) { + if applyErr := r.applyReservationState(ctx, logger, cr); applyErr != nil { + logger.Error(applyErr, "pending commitment placement failed, rejecting") + if rollbackErr := r.deleteChildReservations(ctx, cr); rollbackErr != nil { + return ctrl.Result{}, rollbackErr + } + return ctrl.Result{}, r.setNotReady(ctx, cr, "Rejected", applyErr.Error()) + } + return ctrl.Result{}, r.setAccepted(ctx, cr) +} + +func (r *CommittedResourceController) reconcileCommitted(ctx context.Context, logger logr.Logger, cr *v1alpha1.CommittedResource) (ctrl.Result, error) { + // Spec errors are permanent regardless of AllowRejection — a bad spec won't fix itself. + if _, err := FromCommittedResource(*cr); err != nil { + logger.Error(err, "invalid commitment spec, rejecting") + return ctrl.Result{}, r.setNotReady(ctx, cr, "Rejected", err.Error()) + } + if applyErr := r.applyReservationState(ctx, logger, cr); applyErr != nil { + if cr.Spec.AllowRejection { + logger.Error(applyErr, "committed placement failed, rolling back to accepted amount") + if rollbackErr := r.rollbackToAccepted(ctx, logger, cr); rollbackErr != nil { + return ctrl.Result{}, rollbackErr + } + return ctrl.Result{}, r.setNotReady(ctx, cr, "Rejected", applyErr.Error()) + } + logger.Error(applyErr, "committed placement incomplete, will retry", "requeueAfter", r.Conf.RequeueIntervalRetry) + return ctrl.Result{RequeueAfter: r.Conf.RequeueIntervalRetry}, r.setNotReady(ctx, cr, "Reserving", applyErr.Error()) + } + return ctrl.Result{}, r.setAccepted(ctx, cr) +} + +func (r *CommittedResourceController) applyReservationState(ctx context.Context, logger logr.Logger, cr *v1alpha1.CommittedResource) error { + knowledge := &reservations.FlavorGroupKnowledgeClient{Client: r.Client} + flavorGroups, err := knowledge.GetAllFlavorGroups(ctx, nil) + if err != nil { + return fmt.Errorf("flavor knowledge not ready: %w", err) + } + + state, err := FromCommittedResource(*cr) + if err != nil { + return fmt.Errorf("invalid commitment spec: %w", err) + } + state.NamePrefix = cr.Name + "-" + state.CreatorRequestID = reservations.GlobalRequestIDFromContext(ctx) + + result, err := NewReservationManager(r.Client).ApplyCommitmentState(ctx, logger, state, flavorGroups, "committed-resource-controller") + if err != nil { + return err + } + logger.Info("commitment state applied", "created", result.Created, "deleted", result.Deleted, "repaired", result.Repaired) + return nil +} + +func (r *CommittedResourceController) setAccepted(ctx context.Context, cr *v1alpha1.CommittedResource) error { + now := metav1.Now() + old := cr.DeepCopy() + acceptedAmount := cr.Spec.Amount.DeepCopy() + cr.Status.AcceptedAmount = &acceptedAmount + cr.Status.AcceptedAt = &now + meta.SetStatusCondition(&cr.Status.Conditions, metav1.Condition{ + Type: v1alpha1.CommittedResourceConditionReady, + Status: metav1.ConditionTrue, + Reason: "Accepted", + Message: "commitment successfully reserved", + LastTransitionTime: now, + }) + if err := r.Status().Patch(ctx, cr, client.MergeFrom(old)); err != nil { + return client.IgnoreNotFound(err) + } + return nil +} + +func (r *CommittedResourceController) reconcileInactive(ctx context.Context, logger logr.Logger, cr *v1alpha1.CommittedResource) (ctrl.Result, error) { + if err := r.deleteChildReservations(ctx, cr); err != nil { + return ctrl.Result{}, err + } + logger.Info("commitment inactive, child reservations removed", "state", cr.Spec.State) + return ctrl.Result{}, r.setNotReady(ctx, cr, string(cr.Spec.State), "commitment is no longer active") +} + +func (r *CommittedResourceController) reconcileDeletion(ctx context.Context, logger logr.Logger, cr *v1alpha1.CommittedResource) (ctrl.Result, error) { + if err := r.deleteChildReservations(ctx, cr); err != nil { + return ctrl.Result{}, err + } + controllerutil.RemoveFinalizer(cr, crFinalizer) + if err := r.Update(ctx, cr); err != nil { + return ctrl.Result{}, client.IgnoreNotFound(err) + } + logger.Info("committed resource deleted, child reservations cleaned up") + return ctrl.Result{}, nil +} + +// deleteChildReservations deletes all Reservation CRDs owned by this CommittedResource, +// identified by matching CommitmentUUID in the reservation spec. +func (r *CommittedResourceController) deleteChildReservations(ctx context.Context, cr *v1alpha1.CommittedResource) error { + var list v1alpha1.ReservationList + if err := r.List(ctx, &list, client.MatchingLabels{ + v1alpha1.LabelReservationType: v1alpha1.ReservationTypeLabelCommittedResource, + }); err != nil { + return fmt.Errorf("failed to list reservations: %w", err) + } + for i := range list.Items { + res := &list.Items[i] + if res.Spec.CommittedResourceReservation == nil || + res.Spec.CommittedResourceReservation.CommitmentUUID != cr.Spec.CommitmentUUID { + continue + } + if err := r.Delete(ctx, res); client.IgnoreNotFound(err) != nil { + return fmt.Errorf("failed to delete reservation %s: %w", res.Name, err) + } + } + return nil +} + +// rollbackToAccepted restores child Reservations to match Status.AcceptedAmount. +// If AcceptedAmount is nil (new CR that was never accepted), all child Reservations are deleted. +func (r *CommittedResourceController) rollbackToAccepted(ctx context.Context, logger logr.Logger, cr *v1alpha1.CommittedResource) error { + if cr.Status.AcceptedAmount == nil { + return r.deleteChildReservations(ctx, cr) + } + knowledge := &reservations.FlavorGroupKnowledgeClient{Client: r.Client} + flavorGroups, err := knowledge.GetAllFlavorGroups(ctx, nil) + if err != nil { + // Can't compute the rollback target — fall back to full delete rather than leaving + // a partial state that's inconsistent with the unknown AcceptedAmount. + logger.Error(err, "flavor knowledge unavailable during rollback, deleting all child reservations") + return r.deleteChildReservations(ctx, cr) + } + state, err := FromCommittedResource(*cr) + if err != nil { + logger.Error(err, "invalid spec during rollback, deleting all child reservations") + return r.deleteChildReservations(ctx, cr) + } + state.TotalMemoryBytes = cr.Status.AcceptedAmount.Value() + state.NamePrefix = cr.Name + "-" + state.CreatorRequestID = reservations.GlobalRequestIDFromContext(ctx) + if _, err := NewReservationManager(r.Client).ApplyCommitmentState(ctx, logger, state, flavorGroups, "committed-resource-controller-rollback"); err != nil { + return fmt.Errorf("rollback apply failed: %w", err) + } + return nil +} + +// setNotReady patches Ready=False on CommittedResource status. +func (r *CommittedResourceController) setNotReady(ctx context.Context, cr *v1alpha1.CommittedResource, reason, message string) error { + old := cr.DeepCopy() + meta.SetStatusCondition(&cr.Status.Conditions, metav1.Condition{ + Type: v1alpha1.CommittedResourceConditionReady, + Status: metav1.ConditionFalse, + Reason: reason, + Message: message, + LastTransitionTime: metav1.Now(), + }) + if err := r.Status().Patch(ctx, cr, client.MergeFrom(old)); err != nil { + return client.IgnoreNotFound(err) + } + return nil +} + +// SetupWithManager sets up the controller with the Manager. +func (r *CommittedResourceController) SetupWithManager(mgr ctrl.Manager, mcl *multicluster.Client) error { + ctx := context.Background() + if err := IndexFields(ctx, mcl); err != nil { + return fmt.Errorf("failed to set up field indexes: %w", err) + } + + bldr := multicluster.BuildController(mcl, mgr) + var err error + bldr, err = bldr.WatchesMulticluster( + &v1alpha1.CommittedResource{}, + &handler.EnqueueRequestForObject{}, + ) + if err != nil { + return err + } + // Re-enqueue the parent CommittedResource when a child Reservation changes (e.g. external deletion). + bldr, err = bldr.WatchesMulticluster( + &v1alpha1.Reservation{}, + handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, obj client.Object) []ctrl.Request { + res, ok := obj.(*v1alpha1.Reservation) + if !ok || res.Spec.CommittedResourceReservation == nil { + return nil + } + uuid := res.Spec.CommittedResourceReservation.CommitmentUUID + var crList v1alpha1.CommittedResourceList + if err := r.List(ctx, &crList, client.MatchingFields{idxCommittedResourceByUUID: uuid}); err != nil { + LoggerFromContext(ctx).Error(err, "failed to list CommittedResources by UUID", "uuid", uuid) + return nil + } + if len(crList.Items) == 0 { + return nil + } + return []ctrl.Request{{NamespacedName: types.NamespacedName{Name: crList.Items[0].Name}}} + }), + ) + if err != nil { + return err + } + return bldr.Named("committed-resource"). + WithOptions(controller.Options{ + MaxConcurrentReconciles: 1, + }). + Complete(r) +} diff --git a/internal/scheduling/reservations/commitments/committed_resource_controller_test.go b/internal/scheduling/reservations/commitments/committed_resource_controller_test.go new file mode 100644 index 000000000..6e6103972 --- /dev/null +++ b/internal/scheduling/reservations/commitments/committed_resource_controller_test.go @@ -0,0 +1,441 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package commitments + +import ( + "context" + "encoding/json" + "testing" + "time" + + hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" + "k8s.io/apimachinery/pkg/api/meta" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + "github.com/cobaltcore-dev/cortex/api/v1alpha1" +) + +// ============================================================================ +// Helpers +// ============================================================================ + +// newTestCommittedResource returns a CommittedResource with sensible defaults. +// The finalizer is pre-populated so tests can call Reconcile once without a +// separate finalizer-add round-trip. +func newTestCommittedResource(name string, state v1alpha1.CommitmentStatus) *v1alpha1.CommittedResource { + return &v1alpha1.CommittedResource{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Finalizers: []string{crFinalizer}, + }, + Spec: v1alpha1.CommittedResourceSpec{ + CommitmentUUID: "test-uuid-1234", + FlavorGroupName: "test-group", + ResourceType: v1alpha1.CommittedResourceTypeMemory, + Amount: resource.MustParse("4Gi"), + AvailabilityZone: "test-az", + ProjectID: "test-project", + DomainID: "test-domain", + State: state, + }, + } +} + +// newTestFlavorKnowledge returns a Knowledge CRD with a single 4 GiB flavor so +// a 4 GiB commitment produces exactly one slot. +func newTestFlavorKnowledge() *v1alpha1.Knowledge { + raw, err := json.Marshal(map[string]any{ + "features": []map[string]any{ + { + "name": "test-group", + "flavors": []map[string]any{ + { + "name": "test-flavor", + "memoryMB": 4096, + "vcpus": 2, + "extraSpecs": map[string]string{}, + }, + }, + }, + }, + }) + if err != nil { + panic(err) + } + return &v1alpha1.Knowledge{ + ObjectMeta: metav1.ObjectMeta{Name: "flavor-groups"}, + Spec: v1alpha1.KnowledgeSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, + Extractor: v1alpha1.KnowledgeExtractorSpec{Name: "flavor_groups"}, + }, + Status: v1alpha1.KnowledgeStatus{ + Raw: runtime.RawExtension{Raw: raw}, + RawLength: 1, + Conditions: []metav1.Condition{ + { + Type: v1alpha1.KnowledgeConditionReady, + Status: metav1.ConditionTrue, + Reason: "Ready", + }, + }, + }, + } +} + +func newCRTestScheme(t *testing.T) *runtime.Scheme { + t.Helper() + scheme := runtime.NewScheme() + if err := v1alpha1.AddToScheme(scheme); err != nil { + t.Fatalf("failed to add v1alpha1 scheme: %v", err) + } + if err := hv1.AddToScheme(scheme); err != nil { + t.Fatalf("failed to add hv1 scheme: %v", err) + } + return scheme +} + +func newCRTestClient(scheme *runtime.Scheme, objects ...client.Object) client.Client { + return fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(objects...). + WithStatusSubresource(&v1alpha1.CommittedResource{}, &v1alpha1.Reservation{}). + Build() +} + +func reconcileReq(name string) ctrl.Request { + return ctrl.Request{NamespacedName: types.NamespacedName{Name: name}} +} + +// assertCondition checks the Ready condition status and reason on a CommittedResource. +func assertCondition(t *testing.T, k8sClient client.Client, crName string, expectedStatus metav1.ConditionStatus, expectedReason string) { + t.Helper() + var cr v1alpha1.CommittedResource + if err := k8sClient.Get(context.Background(), types.NamespacedName{Name: crName}, &cr); err != nil { + t.Fatalf("failed to get CommittedResource %s: %v", crName, err) + } + cond := meta.FindStatusCondition(cr.Status.Conditions, v1alpha1.CommittedResourceConditionReady) + if cond == nil { + t.Errorf("Ready condition not set on %s", crName) + return + } + if cond.Status != expectedStatus { + t.Errorf("%s: expected Ready=%s, got %s", crName, expectedStatus, cond.Status) + } + if cond.Reason != expectedReason { + t.Errorf("%s: expected Reason=%s, got %s", crName, expectedReason, cond.Reason) + } +} + +// countChildReservations counts Reservation CRDs owned by the given CommitmentUUID, +// using the same identity predicate as the controller. +func countChildReservations(t *testing.T, k8sClient client.Client, commitmentUUID string) int { + t.Helper() + var list v1alpha1.ReservationList + if err := k8sClient.List(context.Background(), &list, client.MatchingLabels{ + v1alpha1.LabelReservationType: v1alpha1.ReservationTypeLabelCommittedResource, + }); err != nil { + t.Fatalf("failed to list reservations: %v", err) + } + count := 0 + for _, r := range list.Items { + if r.Spec.CommittedResourceReservation != nil && + r.Spec.CommittedResourceReservation.CommitmentUUID == commitmentUUID { + count++ + } + } + return count +} + +// ============================================================================ +// Tests: per-state reconcile paths +// ============================================================================ + +func TestCommittedResourceController_Reconcile(t *testing.T) { + tests := []struct { + name string + state v1alpha1.CommitmentStatus + expectedStatus metav1.ConditionStatus + expectedReason string + expectedSlots int + needsKnowledge bool + }{ + { + name: "planned: no Reservations created, Ready=False/Planned", + state: v1alpha1.CommitmentStatusPlanned, + expectedStatus: metav1.ConditionFalse, + expectedReason: "Planned", + expectedSlots: 0, + }, + { + name: "pending: Reservations created, Ready=True", + state: v1alpha1.CommitmentStatusPending, + expectedStatus: metav1.ConditionTrue, + expectedReason: "Accepted", + expectedSlots: 1, + needsKnowledge: true, + }, + { + name: "guaranteed: Reservations created, Ready=True", + state: v1alpha1.CommitmentStatusGuaranteed, + expectedStatus: metav1.ConditionTrue, + expectedReason: "Accepted", + expectedSlots: 1, + needsKnowledge: true, + }, + { + name: "confirmed: Reservations created, Ready=True", + state: v1alpha1.CommitmentStatusConfirmed, + expectedStatus: metav1.ConditionTrue, + expectedReason: "Accepted", + expectedSlots: 1, + needsKnowledge: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + scheme := newCRTestScheme(t) + cr := newTestCommittedResource("test-cr", tt.state) + objects := []client.Object{cr} + if tt.needsKnowledge { + objects = append(objects, newTestFlavorKnowledge()) + } + k8sClient := newCRTestClient(scheme, objects...) + controller := &CommittedResourceController{Client: k8sClient, Scheme: scheme, Conf: Config{}} + + if _, err := controller.Reconcile(context.Background(), reconcileReq(cr.Name)); err != nil { + t.Fatalf("reconcile: %v", err) + } + + assertCondition(t, k8sClient, cr.Name, tt.expectedStatus, tt.expectedReason) + if got := countChildReservations(t, k8sClient, cr.Spec.CommitmentUUID); got != tt.expectedSlots { + t.Errorf("expected %d child reservations, got %d", tt.expectedSlots, got) + } + + if tt.expectedSlots > 0 { + var updated v1alpha1.CommittedResource + if err := k8sClient.Get(context.Background(), types.NamespacedName{Name: cr.Name}, &updated); err != nil { + t.Fatalf("get CR: %v", err) + } + if updated.Status.AcceptedAmount == nil { + t.Errorf("expected AcceptedAmount to be set on acceptance") + } + } + }) + } +} + +func TestCommittedResourceController_InactiveStates(t *testing.T) { + tests := []struct { + name string + state v1alpha1.CommitmentStatus + }{ + {name: "superseded: child Reservations deleted, Ready=False", state: v1alpha1.CommitmentStatusSuperseded}, + {name: "expired: child Reservations deleted, Ready=False", state: v1alpha1.CommitmentStatusExpired}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + scheme := newCRTestScheme(t) + cr := newTestCommittedResource("test-cr", tt.state) + existing := &v1alpha1.Reservation{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cr-0", + Labels: map[string]string{ + v1alpha1.LabelReservationType: v1alpha1.ReservationTypeLabelCommittedResource, + }, + }, + Spec: v1alpha1.ReservationSpec{ + Type: v1alpha1.ReservationTypeCommittedResource, + CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ + CommitmentUUID: "test-uuid-1234", + }, + }, + } + k8sClient := newCRTestClient(scheme, cr, existing) + controller := &CommittedResourceController{Client: k8sClient, Scheme: scheme, Conf: Config{}} + + if _, err := controller.Reconcile(context.Background(), reconcileReq(cr.Name)); err != nil { + t.Fatalf("reconcile: %v", err) + } + + assertCondition(t, k8sClient, cr.Name, metav1.ConditionFalse, string(tt.state)) + if got := countChildReservations(t, k8sClient, cr.Spec.CommitmentUUID); got != 0 { + t.Errorf("expected 0 child reservations after %s, got %d", tt.state, got) + } + }) + } +} + +// ============================================================================ +// Tests: placement failure paths +// ============================================================================ + +func TestCommittedResourceController_PlacementFailure(t *testing.T) { + // Knowledge absent → placement fails. Tests diverging behavior by state and AllowRejection. + tests := []struct { + name string + state v1alpha1.CommitmentStatus + allowRejection bool + expectedReason string + expectRequeue bool + }{ + { + name: "pending: always rejects on failure, no retry", + state: v1alpha1.CommitmentStatusPending, + expectedReason: "Rejected", + expectRequeue: false, + }, + { + name: "guaranteed AllowRejection=true: rejects on failure, no retry", + state: v1alpha1.CommitmentStatusGuaranteed, + allowRejection: true, + expectedReason: "Rejected", + expectRequeue: false, + }, + { + name: "confirmed AllowRejection=true: rejects on failure, no retry", + state: v1alpha1.CommitmentStatusConfirmed, + allowRejection: true, + expectedReason: "Rejected", + expectRequeue: false, + }, + { + name: "guaranteed AllowRejection=false: retries on failure", + state: v1alpha1.CommitmentStatusGuaranteed, + allowRejection: false, + expectedReason: "Reserving", + expectRequeue: true, + }, + { + name: "confirmed AllowRejection=false: retries on failure", + state: v1alpha1.CommitmentStatusConfirmed, + allowRejection: false, + expectedReason: "Reserving", + expectRequeue: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + scheme := newCRTestScheme(t) + cr := newTestCommittedResource("test-cr", tt.state) + cr.Spec.AllowRejection = tt.allowRejection + k8sClient := newCRTestClient(scheme, cr) // no Knowledge → placement fails + controller := &CommittedResourceController{ + Client: k8sClient, + Scheme: scheme, + Conf: Config{RequeueIntervalRetry: 1 * time.Minute}, + } + + result, err := controller.Reconcile(context.Background(), reconcileReq(cr.Name)) + if err != nil { + t.Fatalf("reconcile: %v", err) + } + + assertCondition(t, k8sClient, cr.Name, metav1.ConditionFalse, tt.expectedReason) + if tt.expectRequeue && result.RequeueAfter == 0 { + t.Errorf("expected requeue after failure, got none") + } + if !tt.expectRequeue && result.RequeueAfter != 0 { + t.Errorf("expected no requeue after rejection, got RequeueAfter=%v", result.RequeueAfter) + } + if got := countChildReservations(t, k8sClient, cr.Spec.CommitmentUUID); got != 0 { + t.Errorf("expected 0 child reservations after failure, got %d", got) + } + }) + } +} + +func TestCommittedResourceController_BadSpec(t *testing.T) { + // Invalid UUID fails commitmentUUIDPattern — permanently broken regardless of AllowRejection. + scheme := newCRTestScheme(t) + cr := &v1alpha1.CommittedResource{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cr", + Finalizers: []string{crFinalizer}, + }, + Spec: v1alpha1.CommittedResourceSpec{ + CommitmentUUID: "x", // too short, fails commitmentUUIDPattern + FlavorGroupName: "test-group", + ResourceType: v1alpha1.CommittedResourceTypeMemory, + Amount: resource.MustParse("4Gi"), + AvailabilityZone: "test-az", + ProjectID: "test-project", + DomainID: "test-domain", + State: v1alpha1.CommitmentStatusConfirmed, + }, + } + k8sClient := newCRTestClient(scheme, cr, newTestFlavorKnowledge()) + controller := &CommittedResourceController{Client: k8sClient, Scheme: scheme, Conf: Config{}} + + if _, err := controller.Reconcile(context.Background(), reconcileReq(cr.Name)); err != nil { + t.Fatalf("reconcile: %v", err) + } + + assertCondition(t, k8sClient, cr.Name, metav1.ConditionFalse, "Rejected") + if got := countChildReservations(t, k8sClient, cr.Spec.CommitmentUUID); got != 0 { + t.Errorf("expected 0 child reservations after bad-spec rejection, got %d", got) + } +} + +func TestCommittedResourceController_Idempotent(t *testing.T) { + scheme := newCRTestScheme(t) + cr := newTestCommittedResource("test-cr", v1alpha1.CommitmentStatusConfirmed) + k8sClient := newCRTestClient(scheme, cr, newTestFlavorKnowledge()) + controller := &CommittedResourceController{Client: k8sClient, Scheme: scheme, Conf: Config{}} + + for i := range 3 { + if _, err := controller.Reconcile(context.Background(), reconcileReq(cr.Name)); err != nil { + t.Fatalf("reconcile %d: %v", i+1, err) + } + } + + if got := countChildReservations(t, k8sClient, cr.Spec.CommitmentUUID); got != 1 { + t.Errorf("expected 1 child reservation after 3 reconciles (idempotency), got %d", got) + } + assertCondition(t, k8sClient, cr.Name, metav1.ConditionTrue, "Accepted") +} + +func TestCommittedResourceController_Deletion(t *testing.T) { + scheme := newCRTestScheme(t) + cr := newTestCommittedResource("test-cr", v1alpha1.CommitmentStatusConfirmed) + child := &v1alpha1.Reservation{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cr-0", + Labels: map[string]string{ + v1alpha1.LabelReservationType: v1alpha1.ReservationTypeLabelCommittedResource, + }, + }, + Spec: v1alpha1.ReservationSpec{ + Type: v1alpha1.ReservationTypeCommittedResource, + CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ + CommitmentUUID: "test-uuid-1234", + }, + }, + } + k8sClient := newCRTestClient(scheme, cr, child) + controller := &CommittedResourceController{Client: k8sClient, Scheme: scheme, Conf: Config{}} + + if err := k8sClient.Delete(context.Background(), cr); err != nil { + t.Fatalf("delete CR: %v", err) + } + if _, err := controller.Reconcile(context.Background(), reconcileReq(cr.Name)); err != nil { + t.Fatalf("reconcile: %v", err) + } + + if got := countChildReservations(t, k8sClient, cr.Spec.CommitmentUUID); got != 0 { + t.Errorf("expected 0 child reservations after deletion, got %d", got) + } + var deleted v1alpha1.CommittedResource + if err := k8sClient.Get(context.Background(), types.NamespacedName{Name: cr.Name}, &deleted); err == nil { + t.Errorf("expected CR to be gone after deletion, but it still exists with finalizers=%v", deleted.Finalizers) + } +} diff --git a/internal/scheduling/reservations/commitments/committed_resource_integration_test.go b/internal/scheduling/reservations/commitments/committed_resource_integration_test.go new file mode 100644 index 000000000..01a0b4199 --- /dev/null +++ b/internal/scheduling/reservations/commitments/committed_resource_integration_test.go @@ -0,0 +1,331 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package commitments + +// Integration tests for the CR lifecycle spanning CommittedResourceController and +// CommitmentReservationController. These tests drive both controllers against a shared +// fake client and verify the end-to-end state transitions without mocking internal logic. +// +// Scope: +// - State transition: planned → confirmed produces child Reservations +// - State transition: confirmed → expired cleans up child Reservations +// - Reservation controller places a child Reservation created by the CR controller +// - CR deletion removes all child Reservations + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "strings" + "testing" + "time" + + hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + schedulerdelegationapi "github.com/cobaltcore-dev/cortex/api/external/nova" + "github.com/cobaltcore-dev/cortex/api/v1alpha1" +) + +// crIntegrationEnv holds shared state for integration tests. +type crIntegrationEnv struct { + k8sClient client.Client + crController *CommittedResourceController + resController *CommitmentReservationController + schedulerServer *httptest.Server +} + +func newCRIntegrationEnv(t *testing.T) *crIntegrationEnv { + t.Helper() + scheme := newCRTestScheme(t) + + hypervisor := &hv1.Hypervisor{ObjectMeta: metav1.ObjectMeta{Name: "host-1"}} + k8sClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(newTestFlavorKnowledge(), hypervisor). + WithStatusSubresource( + &v1alpha1.CommittedResource{}, + &v1alpha1.Reservation{}, + &v1alpha1.Knowledge{}, + ). + Build() + + schedulerServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + resp := &schedulerdelegationapi.ExternalSchedulerResponse{Hosts: []string{"host-1"}} + if err := json.NewEncoder(w).Encode(resp); err != nil { + t.Errorf("scheduler encode: %v", err) + } + })) + + crCtrl := &CommittedResourceController{ + Client: k8sClient, + Scheme: scheme, + Conf: Config{RequeueIntervalRetry: 5 * time.Minute}, + } + + resCtrl := &CommitmentReservationController{ + Client: k8sClient, + Scheme: scheme, + Conf: Config{ + SchedulerURL: schedulerServer.URL, + AllocationGracePeriod: 15 * time.Minute, + RequeueIntervalActive: 5 * time.Minute, + }, + } + if err := resCtrl.Init(context.Background(), k8sClient, resCtrl.Conf); err != nil { + t.Fatalf("resCtrl.Init: %v", err) + } + + return &crIntegrationEnv{ + k8sClient: k8sClient, + crController: crCtrl, + resController: resCtrl, + schedulerServer: schedulerServer, + } +} + +func (e *crIntegrationEnv) close() { e.schedulerServer.Close() } + +func (e *crIntegrationEnv) reconcileCR(t *testing.T, crName string) { + t.Helper() + req := ctrl.Request{NamespacedName: types.NamespacedName{Name: crName}} + if _, err := e.crController.Reconcile(context.Background(), req); err != nil { + t.Fatalf("CR reconcile: %v", err) + } +} + +func (e *crIntegrationEnv) reconcileReservation(t *testing.T, resName string) { + t.Helper() + req := ctrl.Request{NamespacedName: types.NamespacedName{Name: resName}} + if _, err := e.resController.Reconcile(context.Background(), req); err != nil { + t.Fatalf("reservation reconcile %s: %v", resName, err) + } +} + +func (e *crIntegrationEnv) listChildReservations(t *testing.T, crName string) []v1alpha1.Reservation { + t.Helper() + var list v1alpha1.ReservationList + if err := e.k8sClient.List(context.Background(), &list, client.MatchingLabels{ + v1alpha1.LabelReservationType: v1alpha1.ReservationTypeLabelCommittedResource, + }); err != nil { + t.Fatalf("list reservations: %v", err) + } + prefix := crName + "-" + var children []v1alpha1.Reservation + for _, r := range list.Items { + if strings.HasPrefix(r.Name, prefix) { + children = append(children, r) + } + } + return children +} + +func (e *crIntegrationEnv) getCR(t *testing.T, name string) v1alpha1.CommittedResource { + t.Helper() + var cr v1alpha1.CommittedResource + if err := e.k8sClient.Get(context.Background(), types.NamespacedName{Name: name}, &cr); err != nil { + t.Fatalf("get CR %s: %v", name, err) + } + return cr +} + +// ============================================================================ +// Integration tests +// ============================================================================ + +// TestCRLifecycle_PlannedToConfirmed verifies that transitioning a CR from planned +// to confirmed causes the CR controller to create child Reservation CRDs. +func TestCRLifecycle_PlannedToConfirmed(t *testing.T) { + env := newCRIntegrationEnv(t) + defer env.close() + + cr := newTestCommittedResource("my-cr", v1alpha1.CommitmentStatusPlanned) + if err := env.k8sClient.Create(context.Background(), cr); err != nil { + t.Fatalf("create CR: %v", err) + } + + // Reconcile as planned: finalizer added, no Reservations. + env.reconcileCR(t, cr.Name) + env.reconcileCR(t, cr.Name) + if got := env.listChildReservations(t, cr.Name); len(got) != 0 { + t.Fatalf("planned: expected 0 reservations, got %d", len(got)) + } + crState := env.getCR(t, cr.Name) + cond := meta.FindStatusCondition(crState.Status.Conditions, v1alpha1.CommittedResourceConditionReady) + if cond == nil || cond.Reason != "Planned" { + t.Errorf("planned: expected Reason=Planned, got %v", cond) + } + + // Transition to confirmed. + patch := client.MergeFrom(crState.DeepCopy()) + crState.Spec.State = v1alpha1.CommitmentStatusConfirmed + if err := env.k8sClient.Patch(context.Background(), &crState, patch); err != nil { + t.Fatalf("patch state to confirmed: %v", err) + } + + env.reconcileCR(t, cr.Name) + + children := env.listChildReservations(t, cr.Name) + if len(children) != 1 { + t.Fatalf("confirmed: expected 1 reservation, got %d", len(children)) + } + crState = env.getCR(t, cr.Name) + if !meta.IsStatusConditionTrue(crState.Status.Conditions, v1alpha1.CommittedResourceConditionReady) { + t.Errorf("confirmed: expected Ready=True") + } +} + +// TestCRLifecycle_ConfirmedToExpired verifies that transitioning a CR to expired +// deletes all child Reservation CRDs and marks Ready=False. +func TestCRLifecycle_ConfirmedToExpired(t *testing.T) { + env := newCRIntegrationEnv(t) + defer env.close() + + cr := newTestCommittedResource("my-cr", v1alpha1.CommitmentStatusConfirmed) + if err := env.k8sClient.Create(context.Background(), cr); err != nil { + t.Fatalf("create CR: %v", err) + } + + // Bring to confirmed+Ready=True. + env.reconcileCR(t, cr.Name) // adds finalizer + env.reconcileCR(t, cr.Name) // creates Reservations + + if got := env.listChildReservations(t, cr.Name); len(got) != 1 { + t.Fatalf("pre-expire: expected 1 reservation, got %d", len(got)) + } + + // Transition to expired. + crState := env.getCR(t, cr.Name) + patch := client.MergeFrom(crState.DeepCopy()) + crState.Spec.State = v1alpha1.CommitmentStatusExpired + if err := env.k8sClient.Patch(context.Background(), &crState, patch); err != nil { + t.Fatalf("patch state to expired: %v", err) + } + + env.reconcileCR(t, cr.Name) + + if got := env.listChildReservations(t, cr.Name); len(got) != 0 { + t.Errorf("expired: expected 0 reservations, got %d", len(got)) + } + crState = env.getCR(t, cr.Name) + cond := meta.FindStatusCondition(crState.Status.Conditions, v1alpha1.CommittedResourceConditionReady) + if cond == nil || cond.Status != metav1.ConditionFalse { + t.Errorf("expired: expected Ready=False, got %v", cond) + } + if cond != nil && cond.Reason != string(v1alpha1.CommitmentStatusExpired) { + t.Errorf("expired: expected Reason=%s, got %s", v1alpha1.CommitmentStatusExpired, cond.Reason) + } +} + +// TestCRLifecycle_ReservationControllerPlacesChild verifies that after the CR controller +// creates a child Reservation, the ReservationController can place it (scheduler call → +// TargetHost set → Ready=True on the Reservation). +func TestCRLifecycle_ReservationControllerPlacesChild(t *testing.T) { + env := newCRIntegrationEnv(t) + defer env.close() + + cr := newTestCommittedResource("my-cr", v1alpha1.CommitmentStatusConfirmed) + if err := env.k8sClient.Create(context.Background(), cr); err != nil { + t.Fatalf("create CR: %v", err) + } + + // CR controller creates child Reservation. + env.reconcileCR(t, cr.Name) + env.reconcileCR(t, cr.Name) + + children := env.listChildReservations(t, cr.Name) + if len(children) != 1 { + t.Fatalf("expected 1 child reservation, got %d", len(children)) + } + child := children[0] + + // Reservation controller places it (first reconcile: calls scheduler → sets TargetHost). + env.reconcileReservation(t, child.Name) + + var afterFirst v1alpha1.Reservation + if err := env.k8sClient.Get(context.Background(), types.NamespacedName{Name: child.Name}, &afterFirst); err != nil { + t.Fatalf("get reservation after first reconcile: %v", err) + } + if afterFirst.Spec.TargetHost == "" { + t.Fatalf("expected TargetHost set after first reservation reconcile") + } + + // Second reconcile: syncs TargetHost to Status, sets Ready=True. + env.reconcileReservation(t, child.Name) + + var afterSecond v1alpha1.Reservation + if err := env.k8sClient.Get(context.Background(), types.NamespacedName{Name: child.Name}, &afterSecond); err != nil { + t.Fatalf("get reservation after second reconcile: %v", err) + } + if !meta.IsStatusConditionTrue(afterSecond.Status.Conditions, v1alpha1.ReservationConditionReady) { + t.Errorf("expected reservation Ready=True after placement, got %v", afterSecond.Status.Conditions) + } + if afterSecond.Status.Host != "host-1" { + t.Errorf("expected Status.Host=host-1, got %q", afterSecond.Status.Host) + } +} + +// TestCRLifecycle_Deletion verifies that deleting a CR cleans up all child Reservations. +func TestCRLifecycle_Deletion(t *testing.T) { + env := newCRIntegrationEnv(t) + defer env.close() + + cr := newTestCommittedResource("my-cr", v1alpha1.CommitmentStatusConfirmed) + if err := env.k8sClient.Create(context.Background(), cr); err != nil { + t.Fatalf("create CR: %v", err) + } + + // newTestCommittedResource pre-populates the finalizer, so Delete() will set + // DeletionTimestamp without needing a prior reconcile. + + // Pre-create a child Reservation to verify it gets cleaned up on deletion. + child := &v1alpha1.Reservation{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-cr-0", + Labels: map[string]string{ + v1alpha1.LabelReservationType: v1alpha1.ReservationTypeLabelCommittedResource, + }, + }, + Spec: v1alpha1.ReservationSpec{ + Type: v1alpha1.ReservationTypeCommittedResource, + CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ + CommitmentUUID: "test-uuid-1234", + }, + }, + } + if err := env.k8sClient.Create(context.Background(), child); err != nil { + t.Fatalf("create child reservation: %v", err) + } + + // Delete sets DeletionTimestamp (object has finalizer, so it is not removed yet). + crState := env.getCR(t, cr.Name) + if err := env.k8sClient.Delete(context.Background(), &crState); err != nil { + t.Fatalf("delete CR: %v", err) + } + + env.reconcileCR(t, cr.Name) + + if got := env.listChildReservations(t, cr.Name); len(got) != 0 { + t.Errorf("post-deletion: expected 0 reservations, got %d", len(got)) + } + // Finalizer removed — object either gone or has no finalizer. + var final v1alpha1.CommittedResource + err := env.k8sClient.Get(context.Background(), types.NamespacedName{Name: cr.Name}, &final) + if client.IgnoreNotFound(err) != nil { + t.Fatalf("unexpected error after deletion: %v", err) + } + if err == nil { + for _, f := range final.Finalizers { + if f == crFinalizer { + t.Errorf("finalizer not removed after deletion reconcile") + } + } + } +} diff --git a/internal/scheduling/reservations/commitments/field_index.go b/internal/scheduling/reservations/commitments/field_index.go new file mode 100644 index 000000000..9e3fde378 --- /dev/null +++ b/internal/scheduling/reservations/commitments/field_index.go @@ -0,0 +1,43 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package commitments + +import ( + "context" + "errors" + + "github.com/cobaltcore-dev/cortex/api/v1alpha1" + "github.com/cobaltcore-dev/cortex/pkg/multicluster" + "sigs.k8s.io/controller-runtime/pkg/client" + logf "sigs.k8s.io/controller-runtime/pkg/log" +) + +const idxCommittedResourceByUUID = "spec.commitmentUUID" + +// IndexFields registers field indexes required by the CommittedResource controller. +func IndexFields(ctx context.Context, mcl *multicluster.Client) error { + log := logf.FromContext(ctx) + log.Info("Setting up field indexes for the CommittedResource controller") + if err := mcl.IndexField(ctx, + &v1alpha1.CommittedResource{}, + &v1alpha1.CommittedResourceList{}, + idxCommittedResourceByUUID, + func(obj client.Object) []string { + cr, ok := obj.(*v1alpha1.CommittedResource) + if !ok { + log.Error(errors.New("unexpected type"), "expected CommittedResource", "object", obj) + return nil + } + if cr.Spec.CommitmentUUID == "" { + return nil + } + return []string{cr.Spec.CommitmentUUID} + }, + ); err != nil { + log.Error(err, "failed to set up index for commitmentUUID") + return err + } + log.Info("Successfully set up index for commitmentUUID") + return nil +} diff --git a/internal/scheduling/reservations/commitments/controller.go b/internal/scheduling/reservations/commitments/reservation_controller.go similarity index 100% rename from internal/scheduling/reservations/commitments/controller.go rename to internal/scheduling/reservations/commitments/reservation_controller.go diff --git a/internal/scheduling/reservations/commitments/controller_test.go b/internal/scheduling/reservations/commitments/reservation_controller_test.go similarity index 75% rename from internal/scheduling/reservations/commitments/controller_test.go rename to internal/scheduling/reservations/commitments/reservation_controller_test.go index afb8ebcfc..7c0d63ee7 100644 --- a/internal/scheduling/reservations/commitments/controller_test.go +++ b/internal/scheduling/reservations/commitments/reservation_controller_test.go @@ -15,24 +15,16 @@ import ( "k8s.io/apimachinery/pkg/api/meta" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/client/fake" schedulerdelegationapi "github.com/cobaltcore-dev/cortex/api/external/nova" "github.com/cobaltcore-dev/cortex/api/v1alpha1" ) func TestCommitmentReservationController_Reconcile(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } - if err := hv1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add hypervisor scheme: %v", err) - } + scheme := newCRTestScheme(t) tests := []struct { name string @@ -83,14 +75,10 @@ func TestCommitmentReservationController_Reconcile(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithObjects(tt.reservation). - WithStatusSubresource(&v1alpha1.Reservation{}). - Build() + k8sClient := newCRTestClient(scheme, tt.reservation) reconciler := &CommitmentReservationController{ - Client: client, + Client: k8sClient, Scheme: scheme, Conf: Config{ RequeueIntervalActive: 5 * time.Minute, @@ -118,9 +106,8 @@ func TestCommitmentReservationController_Reconcile(t *testing.T) { t.Errorf("Expected no requeue but got %v", result.RequeueAfter) } - // Verify the reservation status var updated v1alpha1.Reservation - err = client.Get(context.Background(), req.NamespacedName, &updated) + err = k8sClient.Get(context.Background(), req.NamespacedName, &updated) if err != nil { t.Errorf("Failed to get updated reservation: %v", err) return @@ -146,23 +133,18 @@ func TestCommitmentReservationController_Reconcile(t *testing.T) { // ============================================================================ func TestReconcileAllocations_HypervisorCRDPath(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } - if err := hv1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add hypervisor scheme: %v", err) - } + scheme := newCRTestScheme(t) now := time.Now() recentTime := metav1.NewTime(now.Add(-5 * time.Minute)) // 5 minutes ago (within grace period) oldTime := metav1.NewTime(now.Add(-30 * time.Minute)) // 30 minutes ago (past grace period) + config := Config{AllocationGracePeriod: 15 * time.Minute} + tests := []struct { name string reservation *v1alpha1.Reservation hypervisor *hv1.Hypervisor - config Config expectedStatusAllocations map[string]string expectedSpecAllocations []string // VM UUIDs expected to remain in spec; nil means no check expectedHasGracePeriodAllocs bool @@ -175,7 +157,6 @@ func TestReconcileAllocations_HypervisorCRDPath(t *testing.T) { hypervisor: newTestHypervisorCRD("host-1", []hv1.Instance{ {ID: "vm-1", Name: "vm-1", Active: true}, }), - config: Config{AllocationGracePeriod: 15 * time.Minute}, expectedStatusAllocations: map[string]string{"vm-1": "host-1"}, expectedSpecAllocations: []string{"vm-1"}, expectedHasGracePeriodAllocs: false, @@ -186,9 +167,8 @@ func TestReconcileAllocations_HypervisorCRDPath(t *testing.T) { "vm-stopped": oldTime, }), hypervisor: newTestHypervisorCRD("host-1", []hv1.Instance{ - {ID: "vm-stopped", Name: "vm-stopped", Active: false}, // Inactive VM should still be found + {ID: "vm-stopped", Name: "vm-stopped", Active: false}, }), - config: Config{AllocationGracePeriod: 15 * time.Minute}, expectedStatusAllocations: map[string]string{"vm-stopped": "host-1"}, expectedSpecAllocations: []string{"vm-stopped"}, expectedHasGracePeriodAllocs: false, @@ -198,10 +178,9 @@ func TestReconcileAllocations_HypervisorCRDPath(t *testing.T) { reservation: newTestCRReservation(map[string]metav1.Time{ "vm-1": oldTime, }), - hypervisor: newTestHypervisorCRD("host-1", []hv1.Instance{}), // Empty - config: Config{AllocationGracePeriod: 15 * time.Minute}, + hypervisor: newTestHypervisorCRD("host-1", []hv1.Instance{}), expectedStatusAllocations: map[string]string{}, - expectedSpecAllocations: []string{}, // Removed from spec + expectedSpecAllocations: []string{}, expectedHasGracePeriodAllocs: false, }, { @@ -209,31 +188,26 @@ func TestReconcileAllocations_HypervisorCRDPath(t *testing.T) { reservation: newTestCRReservation(map[string]metav1.Time{ "vm-1": recentTime, }), - hypervisor: nil, - config: Config{AllocationGracePeriod: 15 * time.Minute}, expectedStatusAllocations: map[string]string{}, - expectedSpecAllocations: []string{"vm-1"}, // Kept in spec during grace period + expectedSpecAllocations: []string{"vm-1"}, expectedHasGracePeriodAllocs: true, }, { name: "mixed allocations - old verified via CRD, new in grace period", reservation: newTestCRReservation(map[string]metav1.Time{ - "vm-new": recentTime, // In grace period - "vm-old": oldTime, // Past grace period + "vm-new": recentTime, + "vm-old": oldTime, }), hypervisor: newTestHypervisorCRD("host-1", []hv1.Instance{ {ID: "vm-old", Name: "vm-old", Active: true}, }), - config: Config{AllocationGracePeriod: 15 * time.Minute}, - expectedStatusAllocations: map[string]string{"vm-old": "host-1"}, // Only old one confirmed via CRD + expectedStatusAllocations: map[string]string{"vm-old": "host-1"}, expectedSpecAllocations: []string{"vm-new", "vm-old"}, expectedHasGracePeriodAllocs: true, }, { name: "empty allocations - no work to do", reservation: newTestCRReservation(map[string]metav1.Time{}), - hypervisor: nil, - config: Config{AllocationGracePeriod: 15 * time.Minute}, expectedStatusAllocations: map[string]string{}, expectedHasGracePeriodAllocs: false, }, @@ -242,10 +216,8 @@ func TestReconcileAllocations_HypervisorCRDPath(t *testing.T) { reservation: newTestCRReservation(map[string]metav1.Time{ "vm-1": oldTime, }), - hypervisor: nil, // HV CRD does not exist (e.g. host deleted) - config: Config{AllocationGracePeriod: 15 * time.Minute}, expectedStatusAllocations: map[string]string{}, - expectedSpecAllocations: []string{}, // Removed from spec + expectedSpecAllocations: []string{}, expectedHasGracePeriodAllocs: false, }, { @@ -253,32 +225,25 @@ func TestReconcileAllocations_HypervisorCRDPath(t *testing.T) { reservation: newTestCRReservation(map[string]metav1.Time{ "vm-1": recentTime, }), - hypervisor: nil, // HV CRD does not exist - config: Config{AllocationGracePeriod: 15 * time.Minute}, expectedStatusAllocations: map[string]string{}, - expectedSpecAllocations: []string{"vm-1"}, // Kept during grace period + expectedSpecAllocations: []string{"vm-1"}, expectedHasGracePeriodAllocs: true, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - // Build fake client with objects objects := []client.Object{tt.reservation} if tt.hypervisor != nil { objects = append(objects, tt.hypervisor) } - k8sClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithObjects(objects...). - WithStatusSubresource(&v1alpha1.Reservation{}). - Build() + k8sClient := newCRTestClient(scheme, objects...) controller := &CommitmentReservationController{ Client: k8sClient, Scheme: scheme, - Conf: tt.config, + Conf: config, } ctx := WithNewGlobalRequestID(context.Background()) @@ -406,10 +371,7 @@ func newTestHypervisorCRD(name string, instances []hv1.Instance) *hv1.Hypervisor // This covers the mapper logic; the watch wiring itself (informer → mapper → enqueue) // is controller-runtime's responsibility and is not unit-testable without envtest. func TestHypervisorToReservations(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("failed to add scheme: %v", err) - } + scheme := newCRTestScheme(t) res1 := &v1alpha1.Reservation{ ObjectMeta: metav1.ObjectMeta{Name: "res-host-1"}, @@ -436,11 +398,7 @@ func TestHypervisorToReservations(t *testing.T) { Status: v1alpha1.ReservationStatus{Host: "host-1"}, } - k8sClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithObjects(res1, res2, resOtherHost, resNoHost, resFailover). - WithStatusSubresource(&v1alpha1.Reservation{}). - Build() + k8sClient := newCRTestClient(scheme, res1, res2, resOtherHost, resNoHost, resFailover) controller := &CommitmentReservationController{Client: k8sClient} @@ -467,13 +425,7 @@ func TestHypervisorToReservations(t *testing.T) { // ============================================================================ func TestCommitmentReservationController_reconcileInstanceReservation_Success(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } - if err := hv1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add hypervisor scheme: %v", err) - } + scheme := newCRTestScheme(t) reservation := &v1alpha1.Reservation{ ObjectMeta: ctrl.ObjectMeta{ @@ -486,91 +438,16 @@ func TestCommitmentReservationController_reconcileInstanceReservation_Success(t ResourceName: "test-flavor", }, Resources: map[hv1.ResourceName]resource.Quantity{ - hv1.ResourceMemory: resource.MustParse("1Gi"), + hv1.ResourceMemory: resource.MustParse("4Gi"), hv1.ResourceCPU: resource.MustParse("2"), }, }, } - // Create flavor group knowledge CRD for the test - flavorGroups := []struct { - Name string `json:"name"` - Flavors []struct { - Name string `json:"name"` - MemoryMB uint64 `json:"memoryMB"` - VCPUs uint64 `json:"vcpus"` - ExtraSpecs map[string]string `json:"extraSpecs"` - } `json:"flavors"` - }{ - { - Name: "test-group", - Flavors: []struct { - Name string `json:"name"` - MemoryMB uint64 `json:"memoryMB"` - VCPUs uint64 `json:"vcpus"` - ExtraSpecs map[string]string `json:"extraSpecs"` - }{ - { - Name: "test-flavor", - MemoryMB: 1024, - VCPUs: 2, - ExtraSpecs: map[string]string{}, - }, - }, - }, - } - - // Marshal flavor groups into runtime.RawExtension - flavorGroupsJSON, err := json.Marshal(map[string]interface{}{ - "features": flavorGroups, - }) - if err != nil { - t.Fatalf("Failed to marshal flavor groups: %v", err) - } - - flavorGroupKnowledge := &v1alpha1.Knowledge{ - ObjectMeta: metav1.ObjectMeta{ - Name: "flavor-groups", - }, - Spec: v1alpha1.KnowledgeSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - Extractor: v1alpha1.KnowledgeExtractorSpec{ - Name: "flavor_groups", - }, - Recency: metav1.Duration{Duration: 0}, - }, - Status: v1alpha1.KnowledgeStatus{ - Raw: runtime.RawExtension{Raw: flavorGroupsJSON}, - RawLength: 1, - Conditions: []metav1.Condition{ - { - Type: v1alpha1.KnowledgeConditionReady, - Status: metav1.ConditionTrue, - Reason: "TestReady", - }, - }, - }, - } - - // Create mock hypervisors - hypervisor1 := &hv1.Hypervisor{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-host-1", - }, - Spec: hv1.HypervisorSpec{}, - } - hypervisor2 := &hv1.Hypervisor{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-host-2", - }, - Spec: hv1.HypervisorSpec{}, - } + hypervisor1 := &hv1.Hypervisor{ObjectMeta: metav1.ObjectMeta{Name: "test-host-1"}} + hypervisor2 := &hv1.Hypervisor{ObjectMeta: metav1.ObjectMeta{Name: "test-host-2"}} - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithObjects(reservation, flavorGroupKnowledge, hypervisor1, hypervisor2). - WithStatusSubresource(&v1alpha1.Reservation{}, &v1alpha1.Knowledge{}). - Build() + k8sClient := newCRTestClient(scheme, reservation, newTestFlavorKnowledge(), hypervisor1, hypervisor2) // Create a mock server that returns a successful response mockResponse := &schedulerdelegationapi.ExternalSchedulerResponse{ @@ -602,13 +479,13 @@ func TestCommitmentReservationController_reconcileInstanceReservation_Success(t } reconciler := &CommitmentReservationController{ - Client: client, + Client: k8sClient, Scheme: scheme, Conf: config, } // Initialize the reconciler (this sets up SchedulerClient) - if err := reconciler.Init(context.Background(), client, config); err != nil { + if err := reconciler.Init(context.Background(), k8sClient, config); err != nil { t.Fatalf("Failed to initialize reconciler: %v", err) } @@ -630,7 +507,7 @@ func TestCommitmentReservationController_reconcileInstanceReservation_Success(t // Verify Spec.TargetHost is set after first reconcile var afterFirstReconcile v1alpha1.Reservation - if err = client.Get(context.Background(), req.NamespacedName, &afterFirstReconcile); err != nil { + if err = k8sClient.Get(context.Background(), req.NamespacedName, &afterFirstReconcile); err != nil { t.Errorf("Failed to get reservation after first reconcile: %v", err) return } @@ -650,7 +527,7 @@ func TestCommitmentReservationController_reconcileInstanceReservation_Success(t // Verify the reservation status after second reconcile var updated v1alpha1.Reservation - if err = client.Get(context.Background(), req.NamespacedName, &updated); err != nil { + if err = k8sClient.Get(context.Background(), req.NamespacedName, &updated); err != nil { t.Errorf("Failed to get updated reservation: %v", err) return } diff --git a/internal/scheduling/reservations/commitments/reservation_manager.go b/internal/scheduling/reservations/commitments/reservation_manager.go index 0cdbc9f12..d7a75cc7a 100644 --- a/internal/scheduling/reservations/commitments/reservation_manager.go +++ b/internal/scheduling/reservations/commitments/reservation_manager.go @@ -77,11 +77,11 @@ func (m *ReservationManager) ApplyCommitmentState( return nil, fmt.Errorf("failed to list reservations: %w", err) } - // Filter by name prefix to find reservations for this commitment - namePrefix := fmt.Sprintf("commitment-%s-", desiredState.CommitmentUUID) + // Filter by CommitmentUUID to find reservations for this commitment var existing []v1alpha1.Reservation for _, res := range allReservations.Items { - if len(res.Name) >= len(namePrefix) && res.Name[:len(namePrefix)] == namePrefix { + if res.Spec.CommittedResourceReservation != nil && + res.Spec.CommittedResourceReservation.CommitmentUUID == desiredState.CommitmentUUID { existing = append(existing, res) } } @@ -266,7 +266,11 @@ func (m *ReservationManager) newReservation( creator string, ) *v1alpha1.Reservation { - name := fmt.Sprintf("commitment-%s-%d", state.CommitmentUUID, slotIndex) + namePrefix := state.NamePrefix + if namePrefix == "" { + namePrefix = fmt.Sprintf("commitment-%s-", state.CommitmentUUID) + } + name := fmt.Sprintf("%s%d", namePrefix, slotIndex) // Select first flavor that fits remaining memory (flavors sorted descending by size) flavorInGroup := flavorGroup.Flavors[len(flavorGroup.Flavors)-1] // default to smallest diff --git a/internal/scheduling/reservations/commitments/reservation_manager_test.go b/internal/scheduling/reservations/commitments/reservation_manager_test.go index 7733cb6c2..b512fc9b5 100644 --- a/internal/scheduling/reservations/commitments/reservation_manager_test.go +++ b/internal/scheduling/reservations/commitments/reservation_manager_test.go @@ -13,691 +13,276 @@ import ( "github.com/go-logr/logr" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/client/fake" ) -func TestApplyCommitmentState_CreatesNewReservations(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatal(err) +// newTestCRSlot creates a Reservation slot for commitment "abc123" / project "project-1". +// Pass nil allocs for an empty allocation map. +func newTestCRSlot(name string, memGiB int64, targetHost, resourceGroup string, allocs map[string]v1alpha1.CommittedResourceAllocation) v1alpha1.Reservation { + if allocs == nil { + allocs = map[string]v1alpha1.CommittedResourceAllocation{} } - - client := fake.NewClientBuilder(). - WithScheme(scheme). - Build() - - manager := NewReservationManager(client) - flavorGroup := testFlavorGroup() - flavorGroups := map[string]compute.FlavorGroupFeature{ - "test-group": flavorGroup, - } - - // Desired state: 3 multiples of smallest flavor (24 GiB) - desiredState := &CommitmentState{ - CommitmentUUID: "abc123", - ProjectID: "project-1", - FlavorGroupName: "test-group", - TotalMemoryBytes: 3 * 8192 * 1024 * 1024, - } - - applyResult, err := manager.ApplyCommitmentState( - context.Background(), - logr.Discard(), - desiredState, - flavorGroups, - "syncer", - ) - - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if len(applyResult.RemovedReservations) != 0 { - t.Errorf("expected 0 applyResult.RemovedReservations reservations, got %d", len(applyResult.RemovedReservations)) - } - - // Should create reservations to fulfill the commitment - if len(applyResult.TouchedReservations) == 0 { - t.Fatal("expected at least one reservation to be created") - } - - // Verify created reservations sum to desired state - totalMemory := int64(0) - for _, res := range applyResult.TouchedReservations { - memQuantity := res.Spec.Resources[hv1.ResourceMemory] - totalMemory += memQuantity.Value() + return v1alpha1.Reservation{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Labels: map[string]string{ + v1alpha1.LabelReservationType: v1alpha1.ReservationTypeLabelCommittedResource, + }, + }, + Spec: v1alpha1.ReservationSpec{ + TargetHost: targetHost, + Resources: map[hv1.ResourceName]resource.Quantity{ + hv1.ResourceMemory: *resource.NewQuantity(memGiB*1024*1024*1024, resource.BinarySI), + }, + CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ + CommitmentUUID: "abc123", + ProjectID: "project-1", + ResourceGroup: resourceGroup, + Creator: "syncer", + Allocations: allocs, + }, + }, } +} - if totalMemory != desiredState.TotalMemoryBytes { - t.Errorf("expected total memory %d, got %d", desiredState.TotalMemoryBytes, totalMemory) - } +// testFlavorGroups returns the default flavor groups map used across tests. +func testFlavorGroups() map[string]compute.FlavorGroupFeature { + return map[string]compute.FlavorGroupFeature{"test-group": testFlavorGroup()} } -func TestApplyCommitmentState_DeletesExcessReservations(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatal(err) - } +// ============================================================================ +// Tests: ApplyCommitmentState +// ============================================================================ - // Create existing reservations (32 GiB total) - existingReservations := []v1alpha1.Reservation{ +func TestApplyCommitmentState(t *testing.T) { + tests := []struct { + name string + existingSlots []v1alpha1.Reservation + desiredMemoryGiB int64 + flavorGroupOverride map[string]compute.FlavorGroupFeature // nil = testFlavorGroups() + wantError bool + wantRemovedCount int // exact count; -1 = at least one + validateRemoved func(t *testing.T, removed []v1alpha1.Reservation) + validateTouched func(t *testing.T, touched []v1alpha1.Reservation) + validateRemaining func(t *testing.T, remaining []v1alpha1.Reservation) + }{ { - ObjectMeta: metav1.ObjectMeta{ - Name: "commitment-abc123-0", - Labels: map[string]string{ - v1alpha1.LabelReservationType: v1alpha1.ReservationTypeLabelCommittedResource, - }, + name: "creates reservations to match desired memory", + desiredMemoryGiB: 24, // 3 × 8 GiB slots + validateTouched: func(t *testing.T, touched []v1alpha1.Reservation) { + if len(touched) == 0 { + t.Fatal("expected at least one reservation created") + } + var total int64 + for _, r := range touched { + q := r.Spec.Resources[hv1.ResourceMemory] + total += q.Value() + } + if want := int64(24 * 1024 * 1024 * 1024); total != want { + t.Errorf("expected total memory %d, got %d", want, total) + } }, - Spec: v1alpha1.ReservationSpec{ - Resources: map[hv1.ResourceName]resource.Quantity{ - hv1.ResourceMemory: *resource.NewQuantity(16*1024*1024*1024, resource.BinarySI), - }, - CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ - ProjectID: "project-1", - ResourceGroup: "test-group", - Creator: "syncer", - Allocations: map[string]v1alpha1.CommittedResourceAllocation{}, - }, + }, + { + // Algorithm removes both 16 GiB slots and creates a new 8 GiB one. + name: "removes excess reservations, remaining memory matches desired", + existingSlots: []v1alpha1.Reservation{ + newTestCRSlot("commitment-abc123-0", 16, "", "test-group", nil), + newTestCRSlot("commitment-abc123-1", 16, "", "test-group", nil), + }, + desiredMemoryGiB: 8, + wantRemovedCount: -1, + validateRemaining: func(t *testing.T, remaining []v1alpha1.Reservation) { + var total int64 + for _, r := range remaining { + q := r.Spec.Resources[hv1.ResourceMemory] + total += q.Value() + } + if want := int64(8 * 1024 * 1024 * 1024); total != want { + t.Errorf("expected remaining memory %d, got %d", want, total) + } }, }, { - ObjectMeta: metav1.ObjectMeta{ - Name: "commitment-abc123-1", - Labels: map[string]string{ - v1alpha1.LabelReservationType: v1alpha1.ReservationTypeLabelCommittedResource, - }, + name: "zero desired memory removes all reservations", + existingSlots: []v1alpha1.Reservation{ + newTestCRSlot("commitment-abc123-0", 8, "", "test-group", nil), }, - Spec: v1alpha1.ReservationSpec{ - Resources: map[hv1.ResourceName]resource.Quantity{ - hv1.ResourceMemory: *resource.NewQuantity(16*1024*1024*1024, resource.BinarySI), - }, - CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ - ProjectID: "project-1", - ResourceGroup: "test-group", - Creator: "syncer", - Allocations: map[string]v1alpha1.CommittedResourceAllocation{}, - }, + desiredMemoryGiB: 0, + wantRemovedCount: 1, + validateRemaining: func(t *testing.T, remaining []v1alpha1.Reservation) { + if len(remaining) != 0 { + t.Errorf("expected 0 remaining, got %d", len(remaining)) + } }, }, - } - - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithObjects(&existingReservations[0], &existingReservations[1]). - Build() - - manager := NewReservationManager(client) - flavorGroup := testFlavorGroup() - flavorGroups := map[string]compute.FlavorGroupFeature{ - "test-group": flavorGroup, - } - - // Desired state: only 8 GiB (need to reduce) - desiredState := &CommitmentState{ - CommitmentUUID: "abc123", - ProjectID: "project-1", - FlavorGroupName: "test-group", - TotalMemoryBytes: 8 * 1024 * 1024 * 1024, - } - - applyResult, err := manager.ApplyCommitmentState( - context.Background(), - logr.Discard(), - desiredState, - flavorGroups, - "syncer", - ) - - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - // Note: May create a new 8GiB reservation while removing the two 16GiB ones - // This is expected behavior based on the slot sizing algorithm - - // Should remove excess reservations - if len(applyResult.RemovedReservations) == 0 { - t.Fatal("expected reservations to be removed") - } - - // Verify remaining capacity matches desired state - var remainingList v1alpha1.ReservationList - if err := client.List(context.Background(), &remainingList); err != nil { - t.Fatal(err) - } - - totalMemory := int64(0) - for _, res := range remainingList.Items { - memQuantity := res.Spec.Resources[hv1.ResourceMemory] - totalMemory += memQuantity.Value() - } - - if totalMemory != desiredState.TotalMemoryBytes { - t.Errorf("expected remaining memory %d, got %d", desiredState.TotalMemoryBytes, totalMemory) - } -} - -func TestApplyCommitmentState_DeletionPriority(t *testing.T) { - tests := []struct { - name string - existingReservations []v1alpha1.Reservation - desiredMemoryBytes int64 - expectedRemovedCount int - validateRemoved func(t *testing.T, removed []v1alpha1.Reservation) - validateRemaining func(t *testing.T, remaining []v1alpha1.Reservation) - }{ { - name: "Priority 1: Unscheduled reservations (no TargetHost) deleted first", - existingReservations: []v1alpha1.Reservation{ - // Reservation 0: Has TargetHost and allocations - lowest priority (should remain) - { - ObjectMeta: metav1.ObjectMeta{ - Name: "commitment-abc123-0", - Labels: map[string]string{ - v1alpha1.LabelReservationType: v1alpha1.ReservationTypeLabelCommittedResource, - }, - }, - Spec: v1alpha1.ReservationSpec{ - TargetHost: "host-1", - Resources: map[hv1.ResourceName]resource.Quantity{ - hv1.ResourceMemory: *resource.NewQuantity(8*1024*1024*1024, resource.BinarySI), - }, - CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ - ProjectID: "project-1", - ResourceGroup: "test-group", - Creator: "syncer", - Allocations: map[string]v1alpha1.CommittedResourceAllocation{ - "vm-123": {}, - }, - }, - }, - }, - // Reservation 1: No TargetHost and no allocations - highest priority (should be deleted) - { - ObjectMeta: metav1.ObjectMeta{ - Name: "commitment-abc123-1", - Labels: map[string]string{ - v1alpha1.LabelReservationType: v1alpha1.ReservationTypeLabelCommittedResource, - }, - }, - Spec: v1alpha1.ReservationSpec{ - TargetHost: "", - Resources: map[hv1.ResourceName]resource.Quantity{ - hv1.ResourceMemory: *resource.NewQuantity(8*1024*1024*1024, resource.BinarySI), - }, - CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ - ProjectID: "project-1", - ResourceGroup: "test-group", - Creator: "syncer", - Allocations: map[string]v1alpha1.CommittedResourceAllocation{}, - }, - }, - }, + name: "replaces reservation with wrong flavor group", + existingSlots: []v1alpha1.Reservation{ + newTestCRSlot("commitment-abc123-0", 8, "", "wrong-group", nil), + }, + desiredMemoryGiB: 8, + wantRemovedCount: 1, + validateTouched: func(t *testing.T, touched []v1alpha1.Reservation) { + if len(touched) != 1 { + t.Fatalf("expected 1 new reservation, got %d", len(touched)) + } + if got := touched[0].Spec.CommittedResourceReservation.ResourceGroup; got != "test-group" { + t.Errorf("expected flavor group test-group, got %s", got) + } }, - desiredMemoryBytes: 8 * 1024 * 1024 * 1024, // Need to delete one - expectedRemovedCount: 1, + }, + { + name: "unknown flavor group returns error", + desiredMemoryGiB: 8, + flavorGroupOverride: map[string]compute.FlavorGroupFeature{}, + wantError: true, + }, + { + name: "deletion priority: unscheduled (no TargetHost) deleted before scheduled", + existingSlots: []v1alpha1.Reservation{ + newTestCRSlot("commitment-abc123-0", 8, "host-1", "test-group", map[string]v1alpha1.CommittedResourceAllocation{"vm-123": {}}), + newTestCRSlot("commitment-abc123-1", 8, "", "test-group", nil), + }, + desiredMemoryGiB: 8, + wantRemovedCount: 1, validateRemoved: func(t *testing.T, removed []v1alpha1.Reservation) { - // Should have removed the unscheduled one (no TargetHost) if removed[0].Spec.TargetHost != "" { - t.Errorf("expected unscheduled reservation to be removed, but removed %s with TargetHost %s", - removed[0].Name, removed[0].Spec.TargetHost) + t.Errorf("expected unscheduled reservation removed, got TargetHost=%q", removed[0].Spec.TargetHost) } }, validateRemaining: func(t *testing.T, remaining []v1alpha1.Reservation) { if len(remaining) != 1 { - t.Fatalf("expected 1 remaining reservation, got %d", len(remaining)) + t.Fatalf("expected 1 remaining, got %d", len(remaining)) } - // Should have kept the scheduled one with allocations - if remaining[0].Spec.TargetHost == "" { - t.Error("expected scheduled reservation to remain") - } - if len(remaining[0].Spec.CommittedResourceReservation.Allocations) == 0 { - t.Error("expected reservation with allocations to remain") + if remaining[0].Spec.TargetHost == "" || len(remaining[0].Spec.CommittedResourceReservation.Allocations) == 0 { + t.Error("expected scheduled reservation with allocations to remain") } }, }, { - name: "Priority 2: Unused scheduled reservations (no allocations) deleted next", - existingReservations: []v1alpha1.Reservation{ - // Has TargetHost AND allocations - lowest priority for deletion - { - ObjectMeta: metav1.ObjectMeta{ - Name: "commitment-abc123-0", - Labels: map[string]string{ - v1alpha1.LabelReservationType: v1alpha1.ReservationTypeLabelCommittedResource, - }, - }, - Spec: v1alpha1.ReservationSpec{ - TargetHost: "host-1", - Resources: map[hv1.ResourceName]resource.Quantity{ - hv1.ResourceMemory: *resource.NewQuantity(8*1024*1024*1024, resource.BinarySI), - }, - CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ - ProjectID: "project-1", - ResourceGroup: "test-group", - Creator: "syncer", - Allocations: map[string]v1alpha1.CommittedResourceAllocation{ - "vm-123": {}, - }, - }, - }, - }, - // Has TargetHost but NO allocations - medium priority - { - ObjectMeta: metav1.ObjectMeta{ - Name: "commitment-abc123-1", - Labels: map[string]string{ - v1alpha1.LabelReservationType: v1alpha1.ReservationTypeLabelCommittedResource, - }, - }, - Spec: v1alpha1.ReservationSpec{ - TargetHost: "host-2", - Resources: map[hv1.ResourceName]resource.Quantity{ - hv1.ResourceMemory: *resource.NewQuantity(8*1024*1024*1024, resource.BinarySI), - }, - CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ - ProjectID: "project-1", - ResourceGroup: "test-group", - Creator: "syncer", - Allocations: map[string]v1alpha1.CommittedResourceAllocation{}, - }, - }, - }, + name: "deletion priority: unused scheduled (no allocations) deleted before allocated", + existingSlots: []v1alpha1.Reservation{ + newTestCRSlot("commitment-abc123-0", 8, "host-1", "test-group", map[string]v1alpha1.CommittedResourceAllocation{"vm-123": {}}), + newTestCRSlot("commitment-abc123-1", 8, "host-2", "test-group", nil), }, - desiredMemoryBytes: 8 * 1024 * 1024 * 1024, - expectedRemovedCount: 1, + desiredMemoryGiB: 8, + wantRemovedCount: 1, validateRemoved: func(t *testing.T, removed []v1alpha1.Reservation) { - // Should have removed the one without allocations if len(removed[0].Spec.CommittedResourceReservation.Allocations) != 0 { t.Error("expected reservation without allocations to be removed") } }, validateRemaining: func(t *testing.T, remaining []v1alpha1.Reservation) { if len(remaining) != 1 { - t.Fatalf("expected 1 remaining reservation, got %d", len(remaining)) + t.Fatalf("expected 1 remaining, got %d", len(remaining)) } - // Should have kept the one with allocations if len(remaining[0].Spec.CommittedResourceReservation.Allocations) == 0 { t.Error("expected reservation with allocations to remain") } }, }, { - name: "Mixed scenario: comprehensive deletion priority test", - existingReservations: []v1alpha1.Reservation{ - // Reservation 0: Has TargetHost + has allocations (lowest priority - should remain) - { - ObjectMeta: metav1.ObjectMeta{ - Name: "commitment-abc123-0", - Labels: map[string]string{ - v1alpha1.LabelReservationType: v1alpha1.ReservationTypeLabelCommittedResource, - }, - }, - Spec: v1alpha1.ReservationSpec{ - TargetHost: "host-1", - Resources: map[hv1.ResourceName]resource.Quantity{ - hv1.ResourceMemory: *resource.NewQuantity(8*1024*1024*1024, resource.BinarySI), - }, - CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ - ProjectID: "project-1", - ResourceGroup: "test-group", - Creator: "syncer", - Allocations: map[string]v1alpha1.CommittedResourceAllocation{ - "vm-allocated": {}, - }, - }, - }, - }, - // Reservation 1: Has TargetHost + no allocations (medium priority - should remain) - { - ObjectMeta: metav1.ObjectMeta{ - Name: "commitment-abc123-1", - Labels: map[string]string{ - v1alpha1.LabelReservationType: v1alpha1.ReservationTypeLabelCommittedResource, - }, - }, - Spec: v1alpha1.ReservationSpec{ - TargetHost: "host-2", - Resources: map[hv1.ResourceName]resource.Quantity{ - hv1.ResourceMemory: *resource.NewQuantity(8*1024*1024*1024, resource.BinarySI), - }, - CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ - ProjectID: "project-1", - ResourceGroup: "test-group", - Creator: "syncer", - Allocations: map[string]v1alpha1.CommittedResourceAllocation{}, - }, - }, - }, - // Reservation 2: No TargetHost + no allocations (highest priority - should be deleted) - { - ObjectMeta: metav1.ObjectMeta{ - Name: "commitment-abc123-2", - Labels: map[string]string{ - v1alpha1.LabelReservationType: v1alpha1.ReservationTypeLabelCommittedResource, - }, - }, - Spec: v1alpha1.ReservationSpec{ - TargetHost: "", - Resources: map[hv1.ResourceName]resource.Quantity{ - hv1.ResourceMemory: *resource.NewQuantity(8*1024*1024*1024, resource.BinarySI), - }, - CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ - ProjectID: "project-1", - ResourceGroup: "test-group", - Creator: "syncer", - Allocations: map[string]v1alpha1.CommittedResourceAllocation{}, - }, - }, - }, - // Reservation 3: No TargetHost + no allocations (highest priority - should be deleted) - { - ObjectMeta: metav1.ObjectMeta{ - Name: "commitment-abc123-3", - Labels: map[string]string{ - v1alpha1.LabelReservationType: v1alpha1.ReservationTypeLabelCommittedResource, - }, - }, - Spec: v1alpha1.ReservationSpec{ - TargetHost: "", - Resources: map[hv1.ResourceName]resource.Quantity{ - hv1.ResourceMemory: *resource.NewQuantity(8*1024*1024*1024, resource.BinarySI), - }, - CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ - ProjectID: "project-1", - ResourceGroup: "test-group", - Creator: "syncer", - Allocations: map[string]v1alpha1.CommittedResourceAllocation{}, - }, - }, - }, + name: "deletion priority: unscheduled removed first across mixed set", + existingSlots: []v1alpha1.Reservation{ + newTestCRSlot("commitment-abc123-0", 8, "host-1", "test-group", map[string]v1alpha1.CommittedResourceAllocation{"vm-allocated": {}}), + newTestCRSlot("commitment-abc123-1", 8, "host-2", "test-group", nil), + newTestCRSlot("commitment-abc123-2", 8, "", "test-group", nil), + newTestCRSlot("commitment-abc123-3", 8, "", "test-group", nil), }, - desiredMemoryBytes: 16 * 1024 * 1024 * 1024, // Need to delete 2 out of 4 - expectedRemovedCount: 2, + desiredMemoryGiB: 16, + wantRemovedCount: 2, validateRemoved: func(t *testing.T, removed []v1alpha1.Reservation) { - // Both removed should have no TargetHost (highest priority for deletion) - for _, res := range removed { - if res.Spec.TargetHost != "" { - t.Errorf("expected unscheduled reservations to be removed first, but removed %s with TargetHost %s", - res.Name, res.Spec.TargetHost) + for _, r := range removed { + if r.Spec.TargetHost != "" { + t.Errorf("expected unscheduled reservations removed first, got TargetHost=%q on %s", r.Spec.TargetHost, r.Name) } } }, validateRemaining: func(t *testing.T, remaining []v1alpha1.Reservation) { if len(remaining) != 2 { - t.Fatalf("expected 2 remaining reservations, got %d", len(remaining)) - } - // Both remaining should have TargetHost - for _, res := range remaining { - if res.Spec.TargetHost == "" { - t.Errorf("expected scheduled reservations to remain, but %s has no TargetHost", res.Name) - } + t.Fatalf("expected 2 remaining, got %d", len(remaining)) } - // At least one should have allocations (the one with lowest deletion priority) - hasAllocations := false - for _, res := range remaining { - if len(res.Spec.CommittedResourceReservation.Allocations) > 0 { - hasAllocations = true - break + for _, r := range remaining { + if r.Spec.TargetHost == "" { + t.Errorf("expected scheduled reservations to remain, got empty TargetHost on %s", r.Name) } } - if !hasAllocations { - t.Error("expected at least one remaining reservation to have allocations") - } }, }, } + scheme := newCRTestScheme(t) + for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatal(err) + objects := make([]client.Object, len(tt.existingSlots)) + for i := range tt.existingSlots { + objects[i] = &tt.existingSlots[i] } + k8sClient := newCRTestClient(scheme, objects...) + manager := NewReservationManager(k8sClient) - // Convert slice to individual objects for WithObjects - objects := make([]client.Object, len(tt.existingReservations)) - for i := range tt.existingReservations { - objects[i] = &tt.existingReservations[i] + flavorGroups := testFlavorGroups() + if tt.flavorGroupOverride != nil { + flavorGroups = tt.flavorGroupOverride } - - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithObjects(objects...). - Build() - - manager := NewReservationManager(client) - flavorGroup := testFlavorGroup() - flavorGroups := map[string]compute.FlavorGroupFeature{ - "test-group": flavorGroup, - } - desiredState := &CommitmentState{ CommitmentUUID: "abc123", ProjectID: "project-1", FlavorGroupName: "test-group", - TotalMemoryBytes: tt.desiredMemoryBytes, + TotalMemoryBytes: tt.desiredMemoryGiB * 1024 * 1024 * 1024, } applyResult, err := manager.ApplyCommitmentState( - context.Background(), - logr.Discard(), - desiredState, - flavorGroups, - "syncer", + context.Background(), logr.Discard(), desiredState, flavorGroups, "syncer", ) + if tt.wantError { + if err == nil { + t.Fatal("expected error, got nil") + } + return + } if err != nil { t.Fatalf("unexpected error: %v", err) } - if len(applyResult.RemovedReservations) != tt.expectedRemovedCount { - t.Fatalf("expected %d removed reservations, got %d", tt.expectedRemovedCount, len(applyResult.RemovedReservations)) + switch { + case tt.wantRemovedCount > 0: + if len(applyResult.RemovedReservations) != tt.wantRemovedCount { + t.Fatalf("expected %d removed, got %d", tt.wantRemovedCount, len(applyResult.RemovedReservations)) + } + case tt.wantRemovedCount == 0: + if len(applyResult.RemovedReservations) != 0 { + t.Errorf("expected 0 removed, got %d", len(applyResult.RemovedReservations)) + } + case tt.wantRemovedCount == -1: + if len(applyResult.RemovedReservations) == 0 { + t.Fatal("expected at least one removed reservation") + } } if tt.validateRemoved != nil { tt.validateRemoved(t, applyResult.RemovedReservations) } - - // Get remaining reservations - var remainingList v1alpha1.ReservationList - if err := client.List(context.Background(), &remainingList); err != nil { - t.Fatal(err) + if tt.validateTouched != nil { + tt.validateTouched(t, applyResult.TouchedReservations) } - if tt.validateRemaining != nil { - tt.validateRemaining(t, remainingList.Items) + var remaining v1alpha1.ReservationList + if err := k8sClient.List(context.Background(), &remaining); err != nil { + t.Fatal(err) + } + tt.validateRemaining(t, remaining.Items) } }) } } -func TestApplyCommitmentState_HandlesZeroCapacity(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatal(err) - } - - // Create existing reservation - existingReservation := v1alpha1.Reservation{ - ObjectMeta: metav1.ObjectMeta{ - Name: "commitment-abc123-0", - Labels: map[string]string{ - v1alpha1.LabelReservationType: v1alpha1.ReservationTypeLabelCommittedResource, - }, - }, - Spec: v1alpha1.ReservationSpec{ - Resources: map[hv1.ResourceName]resource.Quantity{ - hv1.ResourceMemory: *resource.NewQuantity(8*1024*1024*1024, resource.BinarySI), - }, - CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ - ProjectID: "project-1", - ResourceGroup: "test-group", - Creator: "syncer", - Allocations: map[string]v1alpha1.CommittedResourceAllocation{}, - }, - }, - } - - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithObjects(&existingReservation). - Build() - - manager := NewReservationManager(client) - flavorGroup := testFlavorGroup() - flavorGroups := map[string]compute.FlavorGroupFeature{ - "test-group": flavorGroup, - } - - // Desired state: zero capacity (commitment expired or canceled) - desiredState := &CommitmentState{ - CommitmentUUID: "abc123", - ProjectID: "project-1", - FlavorGroupName: "test-group", - TotalMemoryBytes: 0, - } - - applyResult, err := manager.ApplyCommitmentState( - context.Background(), - logr.Discard(), - desiredState, - flavorGroups, - "syncer", - ) - - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if len(applyResult.TouchedReservations) != 0 { - t.Errorf("expected 0 new reservations, got %d", len(applyResult.TouchedReservations)) - } - - // Should remove all reservations - if len(applyResult.RemovedReservations) != 1 { - t.Fatalf("expected 1 removed reservation, got %d", len(applyResult.RemovedReservations)) - } - - // Verify no reservations remain - var remainingList v1alpha1.ReservationList - if err := client.List(context.Background(), &remainingList); err != nil { - t.Fatal(err) - } - - if len(remainingList.Items) != 0 { - t.Errorf("expected 0 remaining reservations, got %d", len(remainingList.Items)) - } -} - -func TestApplyCommitmentState_FixesWrongFlavorGroup(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatal(err) - } - - // Create reservation with wrong flavor group - existingReservation := v1alpha1.Reservation{ - ObjectMeta: metav1.ObjectMeta{ - Name: "commitment-abc123-0", - Labels: map[string]string{ - v1alpha1.LabelReservationType: v1alpha1.ReservationTypeLabelCommittedResource, - }, - }, - Spec: v1alpha1.ReservationSpec{ - Resources: map[hv1.ResourceName]resource.Quantity{ - hv1.ResourceMemory: *resource.NewQuantity(8*1024*1024*1024, resource.BinarySI), - }, - CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ - ProjectID: "project-1", - ResourceGroup: "wrong-group", // Wrong flavor group - Creator: "syncer", - Allocations: map[string]v1alpha1.CommittedResourceAllocation{}, - }, - }, - } - - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithObjects(&existingReservation). - Build() - - manager := NewReservationManager(client) - flavorGroup := testFlavorGroup() - flavorGroups := map[string]compute.FlavorGroupFeature{ - "test-group": flavorGroup, - } - - // Desired state with correct flavor group - desiredState := &CommitmentState{ - CommitmentUUID: "abc123", - ProjectID: "project-1", - FlavorGroupName: "test-group", - TotalMemoryBytes: 8 * 1024 * 1024 * 1024, - } - - applyResult, err := manager.ApplyCommitmentState( - context.Background(), - logr.Discard(), - desiredState, - flavorGroups, - "syncer", - ) - - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - // Should remove wrong reservation and create new one - if len(applyResult.RemovedReservations) != 1 { - t.Fatalf("expected 1 removed reservation, got %d", len(applyResult.RemovedReservations)) - } - - if len(applyResult.TouchedReservations) != 1 { - t.Fatalf("expected 1 new reservation, got %d", len(applyResult.TouchedReservations)) - } - - // Verify new reservation has correct flavor group - if applyResult.TouchedReservations[0].Spec.CommittedResourceReservation.ResourceGroup != "test-group" { - t.Errorf("expected flavor group test-group, got %s", - applyResult.TouchedReservations[0].Spec.CommittedResourceReservation.ResourceGroup) - } -} - -func TestApplyCommitmentState_UnknownFlavorGroup(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatal(err) - } - - client := fake.NewClientBuilder(). - WithScheme(scheme). - Build() - - manager := NewReservationManager(client) - flavorGroups := map[string]compute.FlavorGroupFeature{} // Empty - - desiredState := &CommitmentState{ - CommitmentUUID: "abc123", - ProjectID: "project-1", - FlavorGroupName: "unknown-group", - TotalMemoryBytes: 8 * 1024 * 1024 * 1024, - } - - _, err := manager.ApplyCommitmentState( - context.Background(), - logr.Discard(), - desiredState, - flavorGroups, - "syncer", - ) - - if err == nil { - t.Fatal("expected error for unknown flavor group, got nil") - } -} +// ============================================================================ +// Tests: newReservation flavor selection +// ============================================================================ func TestNewReservation_SelectsAppropriateFlavor(t *testing.T) { manager := &ReservationManager{} @@ -729,8 +314,8 @@ func TestNewReservation_SelectsAppropriateFlavor(t *testing.T) { }, { name: "oversized uses largest available flavor", - deltaMemory: 100 * 1024 * 1024 * 1024, // 100 GiB (larger than any flavor) - expectedName: "large", // Will use largest available + deltaMemory: 100 * 1024 * 1024 * 1024, // 100 GiB + expectedName: "large", expectedCores: 16, }, } @@ -744,26 +329,15 @@ func TestNewReservation_SelectsAppropriateFlavor(t *testing.T) { TotalMemoryBytes: tt.deltaMemory, } - reservation := manager.newReservation( - state, - 0, - tt.deltaMemory, - flavorGroup, - "syncer", - ) + reservation := manager.newReservation(state, 0, tt.deltaMemory, flavorGroup, "syncer") - // Verify flavor selection if reservation.Spec.CommittedResourceReservation.ResourceName != tt.expectedName { t.Errorf("expected flavor %s, got %s", - tt.expectedName, - reservation.Spec.CommittedResourceReservation.ResourceName) + tt.expectedName, reservation.Spec.CommittedResourceReservation.ResourceName) } - - // Verify CPU allocation cpuQuantity := reservation.Spec.Resources[hv1.ResourceCPU] if cpuQuantity.Value() != tt.expectedCores { - t.Errorf("expected %d cores, got %d", - tt.expectedCores, cpuQuantity.Value()) + t.Errorf("expected %d cores, got %d", tt.expectedCores, cpuQuantity.Value()) } }) } diff --git a/internal/scheduling/reservations/commitments/state.go b/internal/scheduling/reservations/commitments/state.go index 698aea428..96ede88ac 100644 --- a/internal/scheduling/reservations/commitments/state.go +++ b/internal/scheduling/reservations/commitments/state.go @@ -93,6 +93,10 @@ type CommitmentState struct { EndTime *time.Time // CreatorRequestID is the request ID that triggered this state change (for traceability) CreatorRequestID string + // NamePrefix overrides the default "commitment--" reservation naming convention. + // When set (e.g. "-"), Reservation CRDs are named "". + // Used by the CommittedResource controller; leave empty for the legacy syncer path. + NamePrefix string } // FromCommitment converts Limes commitment to CommitmentState. diff --git a/internal/scheduling/reservations/commitments/syncer_test.go b/internal/scheduling/reservations/commitments/syncer_test.go index e4bf6e841..e30f286c7 100644 --- a/internal/scheduling/reservations/commitments/syncer_test.go +++ b/internal/scheduling/reservations/commitments/syncer_test.go @@ -344,10 +344,11 @@ func TestSyncer_SyncReservations_UpdateExisting(t *testing.T) { Spec: v1alpha1.ReservationSpec{ Type: v1alpha1.ReservationTypeCommittedResource, CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ - ProjectID: "old-project", - ResourceName: "old-flavor", - ResourceGroup: "old_group", - Creator: CreatorValue, + CommitmentUUID: "12345-67890-abcdef", + ProjectID: "old-project", + ResourceName: "old-flavor", + ResourceGroup: "old_group", + Creator: CreatorValue, }, Resources: map[hv1.ResourceName]resource.Quantity{ hv1.ResourceMemory: resource.MustParse("512Mi"),