cobaltcore-dev · PhilippMatthes · May 4, 2026 · May 4, 2026 · May 4, 2026 · May 4, 2026
@@ -1,5 +1,24 @@
 # Changelog
 
+## 2026-05-04 — [#793](https://github.com/cobaltcore-dev/cortex/pull/793)
+
+### cortex v0.0.46 (sha-ab6eb45d)
+
+Non-breaking changes:
+- Fix capacity filter to correctly account for multi-VM CommittedResource reservation slots — confirmed VMs are now summed (not just the last one), blocks are clamped to zero when confirmed exceeds slot size, and spec-only VMs larger than remaining slot are fully covered
+- Expose `prometheusDatasourceControllerParallelReconciles` config option to allow parallel reconciles in the Prometheus datasource controller, reducing initial sync latency
+- Remove `Conf` field from PrometheusDatasourceReconciler — config is now loaded internally via `conf.GetConfig` during `SetupWithManager`
+- Add operator-controlled per-resource-type config (`flavorGroupResourceConfig`) for committed resources, replacing runtime derivation from flavor group metadata; supports wildcard (`*`) catch-all for unknown groups
+- Propagate `AnnotationCreatorRequestID` from the change-commitments API to the CommittedResource CRD and through the reservation controller for end-to-end request tracing
+
+### cortex-nova v0.0.59 (sha-ab6eb45d)
+
+Includes updated chart cortex v0.0.46.
+
+Non-breaking changes:
+- Remove all committed resource related Prometheus alerts (info API, change API, usage API, capacity API, and syncer alerts)
+- Add `flavorGroupResourceConfig` to cortex-nova values.yaml with a wildcard default that sets `hasCapacity: true` for ram, cores, and instances
+
 ## 2026-05-04 — [#779](https://github.com/cobaltcore-dev/cortex/pull/779)
 
 ### cortex v0.0.45 (sha-1fb35660)

@@ -573,7 +573,6 @@ func main() {
 			Client:  multiclusterClient,
 			Scheme:  mgr.GetScheme(),
 			Monitor: monitor,
-			Conf:    conf.GetConfigOrDie[prometheus.PrometheusDatasourceReconcilerConfig](),
 		}).SetupWithManager(mgr, multiclusterClient); err != nil {
 			setupLog.Error(err, "unable to create controller", "controller", "PrometheusDatasourceReconciler")
 			os.Exit(1)

@@ -5,7 +5,7 @@ apiVersion: v2
 name: cortex-cinder
 description: A Helm chart deploying Cortex for Cinder.
 type: application
-version: 0.0.58
+version: 0.0.59
 appVersion: 0.1.0
 dependencies:
   # from: file://../../library/cortex-postgres
@@ -16,12 +16,12 @@ dependencies:
   # from: file://../../library/cortex
   - name: cortex
     repository: oci://ghcr.io/cobaltcore-dev/cortex/charts
-    version: 0.0.45
+    version: 0.0.46
     alias: cortex-knowledge-controllers
   # from: file://../../library/cortex
   - name: cortex
     repository: oci://ghcr.io/cobaltcore-dev/cortex/charts
-    version: 0.0.45
+    version: 0.0.46
     alias: cortex-scheduling-controllers
 
   # Owner info adds a configmap to the kubernetes cluster with information on

@@ -5,13 +5,13 @@ apiVersion: v2
 name: cortex-crds
 description: A Helm chart deploying Cortex CRDs.
 type: application
-version: 0.0.58
+version: 0.0.59
 appVersion: 0.1.0
 dependencies:
   # from: file://../../library/cortex
   - name: cortex
     repository: oci://ghcr.io/cobaltcore-dev/cortex/charts
-    version: 0.0.45
+    version: 0.0.46
 
   # Owner info adds a configmap to the kubernetes cluster with information on
   # the service owner. This makes it easier to find out who to contact in case

@@ -5,13 +5,13 @@ apiVersion: v2
 name: cortex-ironcore
 description: A Helm chart deploying Cortex for IronCore.
 type: application
-version: 0.0.58
+version: 0.0.59
 appVersion: 0.1.0
 dependencies:
   # from: file://../../library/cortex
   - name: cortex
     repository: oci://ghcr.io/cobaltcore-dev/cortex/charts
-    version: 0.0.45
+    version: 0.0.46
 
   # Owner info adds a configmap to the kubernetes cluster with information on
   # the service owner. This makes it easier to find out who to contact in case

@@ -5,7 +5,7 @@ apiVersion: v2
 name: cortex-manila
 description: A Helm chart deploying Cortex for Manila.
 type: application
-version: 0.0.58
+version: 0.0.59
 appVersion: 0.1.0
 dependencies:
   # from: file://../../library/cortex-postgres
@@ -16,12 +16,12 @@ dependencies:
   # from: file://../../library/cortex
   - name: cortex
     repository: oci://ghcr.io/cobaltcore-dev/cortex/charts
-    version: 0.0.45
+    version: 0.0.46
     alias: cortex-knowledge-controllers
   # from: file://../../library/cortex
   - name: cortex
     repository: oci://ghcr.io/cobaltcore-dev/cortex/charts
-    version: 0.0.45
+    version: 0.0.46
     alias: cortex-scheduling-controllers
 
   # Owner info adds a configmap to the kubernetes cluster with information on

@@ -5,7 +5,7 @@ apiVersion: v2
 name: cortex-nova
 description: A Helm chart deploying Cortex for Nova.
 type: application
-version: 0.0.58
+version: 0.0.59
 appVersion: 0.1.0
 dependencies:
   # from: file://../../library/cortex-postgres
@@ -16,12 +16,12 @@ dependencies:
   # from: file://../../library/cortex
   - name: cortex
     repository: oci://ghcr.io/cobaltcore-dev/cortex/charts
-    version: 0.0.45
+    version: 0.0.46
     alias: cortex-knowledge-controllers
   # from: file://../../library/cortex
   - name: cortex
     repository: oci://ghcr.io/cobaltcore-dev/cortex/charts
-    version: 0.0.45
+    version: 0.0.46
     alias: cortex-scheduling-controllers
 
   # Owner info adds a configmap to the kubernetes cluster with information on

@@ -287,258 +287,6 @@ groups:
         configuration. It is recommended to investigate the
         pipeline status and logs for more details.
 
-  # Committed Resource Info API Alerts
-  - alert: CortexNovaCommittedResourceInfoHttpRequest500sTooHigh
-    expr: rate(cortex_committed_resource_info_api_requests_total{service="cortex-nova-metrics", status_code=~"5.."}[5m]) > 0.1
-    for: 5m
-    labels:
-      context: committed-resource-api
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-    annotations:
-      summary: "Committed Resource info API HTTP 500 errors too high"
-      description: >
-        The committed resource info API (Limes LIQUID integration) is responding
-        with HTTP 5xx errors. This indicates internal problems building service info,
-        such as invalid flavor group data. Limes will not be able to discover available
-        resources until the issue is resolved.
-
-  # Committed Resource Change API Alerts
-  - alert: CortexNovaCommittedResourceHttpRequest400sTooHigh
-    expr: rate(cortex_committed_resource_change_api_requests_total{service="cortex-nova-metrics", status_code=~"4.."}[5m]) > 0.1
-    for: 5m
-    labels:
-      context: committed-resource-api
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-    annotations:
-      summary: "Committed Resource change API HTTP 400 errors too high"
-      description: >
-        The committed resource change API (Limes LIQUID integration) is responding
-        with HTTP 4xx errors. This may happen when Limes sends a request with
-        an outdated info version (409), the API is temporarily unavailable,
-        or the request format is invalid. Limes will typically retry these
-        requests, so no immediate action is needed unless the errors persist.
-
-  - alert: CortexNovaCommittedResourceHttpRequest500sTooHigh
-    expr: rate(cortex_committed_resource_change_api_requests_total{service="cortex-nova-metrics", status_code=~"5.."}[5m]) > 0.1
-    for: 5m
-    labels:
-      context: committed-resource-api
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-    annotations:
-      summary: "Committed Resource change API HTTP 500 errors too high"
-      description: >
-        The committed resource change API (Limes LIQUID integration) is responding
-        with HTTP 5xx errors. This is not expected and indicates that Cortex
-        is having an internal problem processing commitment changes. Limes will
-        continue to retry, but new commitments may not be fulfilled until the
-        issue is resolved.
-
-  - alert: CortexNovaCommittedResourceLatencyTooHigh
-    expr: |
-      histogram_quantile(0.95, sum(rate(cortex_committed_resource_change_api_request_duration_seconds_bucket{service="cortex-nova-metrics"}[5m])) by (le)) > 30
-      and on() rate(cortex_committed_resource_change_api_requests_total{service="cortex-nova-metrics"}[5m]) > 0
-    for: 5m
-    labels:
-      context: committed-resource-api
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-    annotations:
-      summary: "Committed Resource change API latency too high"
-      description: >
-        The committed resource change API (Limes LIQUID integration) is experiencing
-        high latency (p95 > 30s). This may indicate that the scheduling pipeline
-        is under heavy load or that reservation scheduling is taking longer than
-        expected. Limes requests may time out, causing commitment changes to fail.
-
-  - alert: CortexNovaCommittedResourceRejectionRateTooHigh
-    expr: |
-      (
-        sum(rate(cortex_committed_resource_change_api_commitment_changes_total{service="cortex-nova-metrics", result="rejected"}[5m]))
-        / sum(rate(cortex_committed_resource_change_api_commitment_changes_total{service="cortex-nova-metrics"}[5m]))
-      ) > 0.5
-      and on() sum(rate(cortex_committed_resource_change_api_commitment_changes_total{service="cortex-nova-metrics"}[5m])) > 0
-    for: 5m
-    labels:
-      context: committed-resource-api
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-    annotations:
-      summary: "Committed Resource rejection rate too high"
-      description: >
-        More than 50% of commitment change requests are being rejected.
-        This may indicate insufficient capacity in the datacenter to fulfill
-        new commitments, or issues with the commitment scheduling logic.
-        Rejected commitments are rolled back, so Limes will see them as failed
-        and may retry or report the failure to users.
-
-  - alert: CortexNovaCommittedResourceTimeoutsTooHigh
-    expr: increase(cortex_committed_resource_change_api_timeouts_total{service="cortex-nova-metrics"}[5m]) > 0
-    for: 5m
-    labels:
-      context: committed-resource-api
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-    annotations:
-      summary: "Committed Resource change API timeout detected"
-      description: >
-        The committed resource change API (Limes LIQUID integration) timed out
-        while waiting for reservations to become ready. This indicates that the
-        scheduling pipeline is overloaded or reservations are taking too long
-        to be scheduled. Affected commitment changes are rolled back and Limes
-        will see them as failed. Consider investigating the scheduler performance
-        or increasing the timeout configuration.
-
-  # Committed Resource Usage API Alerts
-  - alert: CortexNovaCommittedResourceUsageHttpRequest400sTooHigh
-    expr: rate(cortex_committed_resource_usage_api_requests_total{service="cortex-nova-metrics", status_code=~"4.."}[5m]) > 0.1
-    for: 5m
-    labels:
-      context: committed-resource-api
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-    annotations:
-      summary: "Committed Resource usage API HTTP 400 errors too high"
-      description: >
-        The committed resource usage API (Limes LIQUID integration) is responding
-        with HTTP 4xx errors. This may indicate invalid project IDs or malformed
-        requests from Limes. Limes will typically retry these requests.
-
-  - alert: CortexNovaCommittedResourceUsageHttpRequest500sTooHigh
-    expr: rate(cortex_committed_resource_usage_api_requests_total{service="cortex-nova-metrics", status_code=~"5.."}[5m]) > 0.1
-    for: 5m
-    labels:
-      context: committed-resource-api
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-    annotations:
-      summary: "Committed Resource usage API HTTP 500 errors too high"
-      description: >
-        The committed resource usage API (Limes LIQUID integration) is responding
-        with HTTP 5xx errors. This indicates internal problems fetching reservations
-        or Nova server data. Limes may receive stale or incomplete usage data.
-
-  - alert: CortexNovaCommittedResourceUsageLatencyTooHigh
-    expr: |
-      histogram_quantile(0.95, sum(rate(cortex_committed_resource_usage_api_request_duration_seconds_bucket{service="cortex-nova-metrics"}[5m])) by (le)) > 10
-      and on() rate(cortex_committed_resource_usage_api_requests_total{service="cortex-nova-metrics"}[5m]) > 0
-    for: 5m
-    labels:
-      context: committed-resource-api
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-    annotations:
-      summary: "Committed Resource usage API latency too high"
-      description: >
-        The committed resource usage API (Limes LIQUID integration) is experiencing
-        high latency (p95 > 10s). This may indicate slow Nova API responses or
-        database queries. Limes scrapes may time out, affecting quota reporting.
-
-  # Committed Resource Capacity API Alerts
-  - alert: CortexNovaCommittedResourceCapacityHttpRequest400sTooHigh
-    expr: rate(cortex_committed_resource_capacity_api_requests_total{service="cortex-nova-metrics", status_code=~"4.."}[5m]) > 0.1
-    for: 5m
-    labels:
-      context: committed-resource-api
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-    annotations:
-      summary: "Committed Resource capacity API HTTP 400 errors too high"
-      description: >
-        The committed resource capacity API (Limes LIQUID integration) is responding
-        with HTTP 4xx errors. This may indicate malformed requests from Limes.
-
-  - alert: CortexNovaCommittedResourceCapacityHttpRequest500sTooHigh
-    expr: rate(cortex_committed_resource_capacity_api_requests_total{service="cortex-nova-metrics", status_code=~"5.."}[5m]) > 0.1
-    for: 5m
-    labels:
-      context: committed-resource-api
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-    annotations:
-      summary: "Committed Resource capacity API HTTP 500 errors too high"
-      description: >
-        The committed resource capacity API (Limes LIQUID integration) is responding
-        with HTTP 5xx errors. This indicates internal problems calculating cluster
-        capacity. Limes may receive stale or incomplete capacity data.
-
-  - alert: CortexNovaCommittedResourceCapacityLatencyTooHigh
-    expr: |
-      histogram_quantile(0.95, sum(rate(cortex_committed_resource_capacity_api_request_duration_seconds_bucket{service="cortex-nova-metrics"}[5m])) by (le)) > 10
-      and on() rate(cortex_committed_resource_capacity_api_requests_total{service="cortex-nova-metrics"}[5m]) > 0
-    for: 5m
-    labels:
-      context: committed-resource-api
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-    annotations:
-      summary: "Committed Resource capacity API latency too high"
-      description: >
-        The committed resource capacity API (Limes LIQUID integration) is experiencing
-        high latency (p95 > 10s). This may indicate slow database queries or knowledge
-        CRD retrieval. Limes scrapes may time out, affecting capacity reporting.
-
-  # Committed Resource Syncer Alerts
-  # These alerts only fire when the syncer is enabled (metrics are only registered when enabled).
-  # Absent metrics = syncer disabled = alerts inactive by design.
-  - alert: CortexNovaCommittedResourceSyncerNotRunning
-    expr: increase(cortex_committed_resource_syncer_duration_seconds_count{service="cortex-nova-metrics"}[3h]) < 1
-    for: 15m
-    labels:
-      context: committed-resource-syncer
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-    annotations:
-      summary: "Committed Resource syncer has not run in 3 hours"
-      description: >
-        No commitment sync has completed in the last 3 hours. The syncer runs hourly,
-        so at least 2 runs should appear in this window. Check that the syncer task
-        is healthy and Limes is reachable.
-
-  - alert: CortexNovaCommittedResourceSyncerErrors
-    expr: increase(cortex_committed_resource_syncer_errors_total{service="cortex-nova-metrics"}[1h]) > 3
-    for: 5m
-    labels:
-      context: committed-resource-syncer
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-    annotations:
-      summary: "Committed Resource syncer is repeatedly failing"
-      description: >
-        The committed resource syncer has encountered more than 3 errors in the last
-        hour. Check syncer logs for details; common causes are connectivity issues
-        with Limes or failures writing CommittedResource CRDs.
-
   - alert: CortexNovaDoesntFindValidKVMHosts
     expr: sum by (az, hvtype) (increase(cortex_vm_faults{hvtype=~"CH|QEMU",faultmsg=~".*No valid host was found.*"}[5m])) > 0
     for: 5m