Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions api/v1alpha1/committed_resource_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,12 @@ const (
// CommittedResourceConditionReady indicates whether the CommittedResource has been
// successfully reconciled into active Reservation CRDs.
CommittedResourceConditionReady = "Ready"

// Condition reasons set by the CommittedResource controller.
CommittedResourceReasonAccepted = "Accepted"
CommittedResourceReasonPlanned = "Planned"
CommittedResourceReasonReserving = "Reserving"
CommittedResourceReasonRejected = "Rejected"
)

// +kubebuilder:object:root=true
Expand Down
15 changes: 15 additions & 0 deletions api/v1alpha1/reservation_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,15 @@ type CommittedResourceReservationSpec struct {
// +kubebuilder:validation:Optional
Creator string `json:"creator,omitempty"`

// ParentGeneration is the Generation of the CommittedResource CRD at the time this
// reservation was last written by the CommittedResource controller. The Reservation
// controller echoes it to Status.CommittedResourceReservation.ObservedParentGeneration
// once it has processed the reservation, allowing the CR controller to wait until
// all child reservations are up-to-date before accepting.
// Zero means the field is not set (syncer-created reservations, no parent CR).
// +kubebuilder:validation:Optional
ParentGeneration int64 `json:"parentGeneration,omitempty"`

// Allocations maps workload identifiers to their allocation details.
// Key: Workload UUID (VM UUID for Nova, Pod UID for Pods, Machine UID for IronCore, etc.)
// Value: allocation state and metadata
Expand Down Expand Up @@ -148,6 +157,12 @@ const (

// CommittedResourceReservationStatus defines the status fields specific to committed resource reservations.
type CommittedResourceReservationStatus struct {
// ObservedParentGeneration is the Spec.CommittedResourceReservation.ParentGeneration value
// that this Reservation controller last processed. When it matches ParentGeneration in spec,
// the CR controller knows this reservation is up-to-date for the current CR spec version.
// +kubebuilder:validation:Optional
ObservedParentGeneration int64 `json:"observedParentGeneration,omitempty"`

// Allocations maps VM/instance UUIDs to the host they are currently running on.
// Key: VM/instance UUID, Value: Host name where the VM is currently running.
// +kubebuilder:validation:Optional
Expand Down
12 changes: 5 additions & 7 deletions cmd/manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,7 @@ func main() {
if commitmentsConfig.DatasourceName != "" {
commitmentsUsageDB = commitments.NewDBUsageClient(multiclusterClient, commitmentsConfig.DatasourceName)
}
commitmentsAPI := commitmentsapi.NewAPIWithConfig(multiclusterClient, commitmentsConfig, commitmentsUsageDB)
commitmentsAPI := commitmentsapi.NewAPIWithConfig(multiclusterClient, commitmentsConfig.API, commitmentsUsageDB)
commitmentsAPI.Init(mux, metrics.Registry, ctrl.Log.WithName("commitments-api"))

if slices.Contains(mainConfig.EnabledControllers, "nova-pipeline-controllers") {
Expand Down Expand Up @@ -538,12 +538,11 @@ func main() {
monitor := reservations.NewMonitor(multiclusterClient)
metrics.Registry.MustRegister(&monitor)
commitmentsConfig := conf.GetConfigOrDie[commitments.Config]()
commitmentsConfig.ApplyDefaults()

if err := (&commitments.CommitmentReservationController{
Client: multiclusterClient,
Scheme: mgr.GetScheme(),
Conf: commitmentsConfig,
Conf: commitmentsConfig.ReservationController,
}).SetupWithManager(mgr, multiclusterClient); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "CommitmentReservation")
os.Exit(1)
Expand All @@ -552,7 +551,7 @@ func main() {
if err := (&commitments.CommittedResourceController{
Client: multiclusterClient,
Scheme: mgr.GetScheme(),
Conf: commitmentsConfig,
Conf: commitmentsConfig.CommittedResourceController,
}).SetupWithManager(mgr, multiclusterClient); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "CommittedResource")
os.Exit(1)
Expand Down Expand Up @@ -716,13 +715,12 @@ func main() {
os.Exit(1)
}

syncerMonitor := commitments.NewSyncerMonitor()
must.Succeed(metrics.Registry.Register(syncerMonitor))
if slices.Contains(mainConfig.EnabledTasks, "commitments-sync-task") {
setupLog.Info("starting commitments syncer")
syncerMonitor := commitments.NewSyncerMonitor()
must.Succeed(metrics.Registry.Register(syncerMonitor))
syncer := commitments.NewSyncer(multiclusterClient, syncerMonitor)
syncerConfig := conf.GetConfigOrDie[commitments.SyncerConfig]()
syncerConfig.ApplyDefaults()
if err := (&task.Runner{
Client: multiclusterClient,
Interval: syncerConfig.SyncInterval,
Expand Down
39 changes: 30 additions & 9 deletions docs/reservations/committed-resource-reservations.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ flowchart LR
UsageAPI[Usage API]
Scheduler[Scheduler API]

ChangeAPI -->|CRUD| CR
Syncer -->|CRUD| CR
ChangeAPI -->|upsert + poll status| CR
Syncer -->|upsert| CR
UsageAPI -->|read| CR
UsageAPI -->|read| Res
CapacityAPI -->|read| Res
Expand Down Expand Up @@ -117,12 +117,31 @@ The controller's job is to keep child `Reservation` CRDs in sync with the desire

- **`pending`**: Cortex is being asked for a yes/no decision. If placement fails for any reason, child Reservations are removed and the CR is marked Rejected. The caller (e.g. the change-commitments API) reads the outcome and reports back to Limes. No retry.

- **`guaranteed` / `confirmed`**: Cortex is expected to honour the commitment. The default is to keep retrying until placement succeeds (`Ready=False, Reason=Reserving`). Callers that can accept "no" as an answer (e.g. the change-commitments API on a resize request) set `Spec.AllowRejection=true`; the controller then rejects on failure instead of retrying.
- **`guaranteed` / `confirmed`**: Cortex is expected to honour the commitment. The default is to keep retrying until placement succeeds (`Ready=False, Reason=Reserving`). Callers that can accept "no" as an answer set `Spec.AllowRejection=true` (the change-commitments API sets this for confirming requests — new commitments, resizes); the controller then rejects on failure instead of retrying.

- **On rejection**: rolls back child Reservations to the last successfully placed quantity (`Status.AcceptedAmount`). For a CR that was never accepted, this means removing all child Reservations.

The controller communicates with the Reservation controller only through CRDs — no direct calls.

**Reconcile trigger flow:**

```mermaid
sequenceDiagram
participant API as Change-Commitments API
participant CRCtrl as CR Controller
participant CRCRD as CommittedResource CRD
participant ResCRD as Reservation CRD
participant ResCtrl as Reservation Controller

API->>CRCRD: write (create/update)
CRCRD-->>CRCtrl: watch fires
CRCtrl->>ResCRD: create/update child slots
ResCRD-->>ResCtrl: watch fires
ResCtrl->>ResCRD: update (ObservedParentGeneration, Ready=True/False)
ResCRD-->>CRCtrl: watch fires (Reservation→parent CR lookup)
CRCtrl->>CRCRD: update status (Accepted / Reserving / Rejected)
```

### Reservation Lifecycle

| Component | Event | Timing | Action |
Expand Down Expand Up @@ -228,18 +247,20 @@ The `Reservation` controller (`CommitmentReservationController`) watches `Reserv

### Change-Commitments API

The change-commitments API receives batched commitment changes from Limes and manages reservations accordingly.
The change-commitments API receives batched commitment changes from Limes and applies them using a **write-intent, watch-for-outcome** pattern: the handler creates or updates `CommittedResource` CRDs and polls their `Status.Conditions` until each reaches a terminal state — it does not interact with `Reservation` CRDs directly.

**Request Semantics**: A request can contain multiple commitment changes across different projects and flavor groups. The semantic is **all-or-nothing** — if any commitment in the batch cannot be fulfilled (e.g., insufficient capacity), the entire request is rejected and rolled back.

**Operations**: Cortex performs CRUD operations on local Reservation CRDs to match the new desired state:
- Creates new reservations for increased commitment amounts
- Deletes existing reservations for decreased commitments
- Preserves existing reservations that already have VMs allocated when possible
**Operations**:
1. For each commitment in the batch, create or update a `CommittedResource` CRD. `Spec.AllowRejection` mirrors the request's `RequiresConfirmation` flag: `true` for changes where Limes needs a yes/no answer (new commitments, resizes), `false` for non-confirming changes (deletions, status-only transitions) where Limes doesn't act on the rejection reason
2. Poll `CommittedResource.Status.Conditions[Ready]` until each reaches a terminal state: `Reason=Accepted` (success), `Reason=Planned` (deferred; accepted), or `Reason=Rejected` (failure) — only for confirming changes; non-confirming changes return immediately without polling
3. On any failure or timeout, restore all modified `CommittedResource` CRDs to their pre-request specs (or delete newly-created ones)

The `CommittedResource` controller handles all downstream `Reservation` CRUD. `AllowRejection=true` tells it to reject and roll back child Reservations on placement failure rather than retrying indefinitely.

### Syncer Task

The syncer task runs periodically and syncs local Reservation CRD state to match Limes' view of commitments, correcting drift from missed API calls or restarts.
The syncer task runs periodically and syncs local `CommittedResource` CRD state to match Limes' view of commitments, correcting drift from missed API calls or restarts. It writes `CommittedResource` CRDs only — Reservation CRUD is the controller's responsibility.

### Usage API

Expand Down
105 changes: 15 additions & 90 deletions helm/bundles/cortex-nova/alerts/nova.alerts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -505,52 +505,10 @@ groups:
CRD retrieval. Limes scrapes may time out, affecting capacity reporting.

# Committed Resource Syncer Alerts
- alert: CortexNovaCommittedResourceSyncerErrorsHigh
expr: increase(cortex_committed_resource_syncer_errors_total{service="cortex-nova-metrics"}[1h]) > 3
for: 5m
labels:
context: committed-resource-syncer
dashboard: cortex-status-dashboard/cortex-status-dashboard
service: cortex
severity: warning
support_group: workload-management
annotations:
summary: "Committed Resource syncer experiencing errors"
description: >
The committed resource syncer has encountered multiple errors in the last hour.
This may indicate connectivity issues with Limes, malformed API responses,
or failures writing reservation CRDs. Check the syncer logs for error details.

- alert: CortexNovaCommittedResourceSyncerUnitMismatchRateHigh
expr: |
(
sum(rate(cortex_committed_resource_syncer_commitments_skipped_total{service="cortex-nova-metrics", reason="unit_mismatch"}[1h]))
/ sum(rate(cortex_committed_resource_syncer_commitments_total{service="cortex-nova-metrics"}[1h]))
) > 0.05
and on() sum(rate(cortex_committed_resource_syncer_commitments_total{service="cortex-nova-metrics"}[1h])) > 0
for: 15m
labels:
context: committed-resource-syncer
dashboard: cortex-status-dashboard/cortex-status-dashboard
service: cortex
severity: warning
support_group: workload-management
annotations:
summary: "Committed Resource syncer unit mismatch rate >5%"
description: >
More than 5% of commitments are being skipped due to unit mismatches between
Limes and Cortex flavor groups. This happens when Limes has not yet been
updated to use the new unit format after a flavor group change. The affected
commitments will keep their existing reservations until Limes notices the update.
Check the logs if this error persists for longer time.

- alert: CortexNovaCommittedResourceSyncerUnknownFlavorGroupRateHigh
expr: |
(
sum(rate(cortex_committed_resource_syncer_commitments_skipped_total{service="cortex-nova-metrics", reason="unknown_flavor_group"}[1h]))
/ sum(rate(cortex_committed_resource_syncer_commitments_total{service="cortex-nova-metrics"}[1h]))
) > 0
and on() sum(rate(cortex_committed_resource_syncer_commitments_total{service="cortex-nova-metrics"}[1h])) > 0
# These alerts only fire when the syncer is enabled (metrics are only registered when enabled).
# Absent metrics = syncer disabled = alerts inactive by design.
- alert: CortexNovaCommittedResourceSyncerNotRunning
expr: increase(cortex_committed_resource_syncer_duration_seconds_count{service="cortex-nova-metrics"}[3h]) < 1
for: 15m
labels:
context: committed-resource-syncer
Expand All @@ -559,60 +517,27 @@ groups:
severity: warning
support_group: workload-management
annotations:
summary: "Committed Resource syncer unknown flavor group rate >0%"
summary: "Committed Resource syncer has not run in 3 hours"
description: >
Some commitments reference flavor groups that don't exist in
Cortex Knowledge (anymore). This may indicate that flavor group configuration is
out of sync between Limes and Cortex, or that Knowledge extraction is failing.
Check the flavor group Knowledge CRD and history to see what was changed.
No commitment sync has completed in the last 3 hours. The syncer runs hourly,
so at least 2 runs should appear in this window. Check that the syncer task
is healthy and Limes is reachable.

- alert: CortexNovaCommittedResourceSyncerLocalChangeRateHigh
expr: |
(
(
rate(cortex_committed_resource_syncer_reservations_created_total{service="cortex-nova-metrics"}[1h]) +
rate(cortex_committed_resource_syncer_reservations_deleted_total{service="cortex-nova-metrics"}[1h]) +
rate(cortex_committed_resource_syncer_reservations_repaired_total{service="cortex-nova-metrics"}[1h])
) / rate(cortex_committed_resource_syncer_commitments_processed_total{service="cortex-nova-metrics"}[1h])
) > 0.01
and on() rate(cortex_committed_resource_syncer_commitments_processed_total{service="cortex-nova-metrics"}[1h]) > 0
for: 15m
labels:
context: committed-resource-syncer
dashboard: cortex-status-dashboard/cortex-status-dashboard
service: cortex
severity: warning
support_group: workload-management
annotations:
summary: "Committed Resource syncer local change rate >1%"
description: >
More than 1% of synced commitments are requiring reservation changes
(creates, deletes, or repairs). This is higher than expected for steady-state
operation and may indicate data inconsistencies, external modifications to
reservations, or issues with the CRDs. Check Cortex logs for details.

- alert: CortexNovaCommittedResourceSyncerRepairRateHigh
expr: |
(
rate(cortex_committed_resource_syncer_reservations_repaired_total{service="cortex-nova-metrics"}[1h])
/ rate(cortex_committed_resource_syncer_commitments_processed_total{service="cortex-nova-metrics"}[1h])
) > 0
and on() rate(cortex_committed_resource_syncer_commitments_processed_total{service="cortex-nova-metrics"}[1h]) > 0
for: 15m
- alert: CortexNovaCommittedResourceSyncerErrors
expr: increase(cortex_committed_resource_syncer_errors_total{service="cortex-nova-metrics"}[1h]) > 3
for: 5m
labels:
context: committed-resource-syncer
dashboard: cortex-status-dashboard/cortex-status-dashboard
service: cortex
severity: warning
support_group: workload-management
annotations:
summary: "Committed Resource syncer repair rate >0%"
summary: "Committed Resource syncer is repeatedly failing"
description: >
Some commitments have reservations that needed repair
(wrong metadata like project ID or flavor group). This may indicate data
corruption, bugs in reservation creation, or external modifications.
Reservations are automatically repaired, but the root cause should be
investigated if this alert persists.
The committed resource syncer has encountered more than 3 errors in the last
hour. Check syncer logs for details; common causes are connectivity issues
with Limes or failures writing CommittedResource CRDs.

- alert: CortexNovaDoesntFindValidKVMHosts
expr: sum by (az, hvtype) (cortex_vm_faults{hvtype=~"CH|QEMU",faultmsg=~".*No valid host was found.*"}) > 0
Expand Down
Loading
Loading