diff --git a/helm/bundles/cortex-nova/values.yaml b/helm/bundles/cortex-nova/values.yaml index d694bdfba..31a203f12 100644 --- a/helm/bundles/cortex-nova/values.yaml +++ b/helm/bundles/cortex-nova/values.yaml @@ -174,6 +174,22 @@ cortex-scheduling-controllers: enableReportUsage: true # When false, the endpoint returns HTTP 503. enableReportCapacity: true + # Maps flavor group IDs to resource flag configs; "*" acts as catch-all. + # Controls handlesCommitments, hasCapacity, hasQuota per resource type for each group. + flavorGroupResourceConfig: + "*": # catch-all (unknown groups) + ram: + handlesCommitments: false + hasCapacity: true + hasQuota: false + cores: + handlesCommitments: false + hasCapacity: true + hasQuota: false + instances: + handlesCommitments: false + hasCapacity: true + hasQuota: false # OvercommitMappings is a list of mappings that map hypervisor traits to # overcommit ratios. Note that this list is applied in order, so if there # are multiple mappings applying to the same hypervisors, the last mapping diff --git a/internal/scheduling/reservations/commitments/api/change_commitments.go b/internal/scheduling/reservations/commitments/api/change_commitments.go index 9849075b9..b7783b599 100644 --- a/internal/scheduling/reservations/commitments/api/change_commitments.go +++ b/internal/scheduling/reservations/commitments/api/change_commitments.go @@ -189,8 +189,8 @@ ProcessLoop: break ProcessLoop } - if !commitments.FlavorGroupAcceptsCommitments(&flavorGroup) { - failedReason = commitments.FlavorGroupCommitmentRejectionReason(&flavorGroup) + if !api.config.ResourceConfigForGroup(flavorGroupName).RAM.HandlesCommitments { + failedReason = fmt.Sprintf("flavor group %q is not configured to handle commitments", flavorGroupName) rollback = true break ProcessLoop } @@ -247,6 +247,10 @@ ProcessLoop: cr.Name = crName if _, err := controllerutil.CreateOrUpdate(ctx, api.client, cr, func() error { applyCRSpec(cr, stateDesired, allowRejection) + if cr.Annotations == nil { + cr.Annotations = make(map[string]string) + } + cr.Annotations[v1alpha1.AnnotationCreatorRequestID] = reservations.GlobalRequestIDFromContext(ctx) return nil }); err != nil { failedReason = fmt.Sprintf("commitment %s: failed to write CommittedResource CRD: %v", commitment.UUID, err) diff --git a/internal/scheduling/reservations/commitments/api/change_commitments_e2e_test.go b/internal/scheduling/reservations/commitments/api/change_commitments_e2e_test.go index ee546655b..3f19b4857 100644 --- a/internal/scheduling/reservations/commitments/api/change_commitments_e2e_test.go +++ b/internal/scheduling/reservations/commitments/api/change_commitments_e2e_test.go @@ -123,6 +123,9 @@ func newE2EEnv(t *testing.T, flavors []*TestFlavor, infoVersion int64, scheduler cfg := commitments.DefaultAPIConfig() cfg.WatchTimeout = metav1.Duration{Duration: 5 * time.Second} cfg.WatchPollInterval = metav1.Duration{Duration: 100 * time.Millisecond} + cfg.FlavorGroupResourceConfig = map[string]commitments.FlavorGroupResourcesConfig{ + "*": {RAM: commitments.ResourceTypeConfig{HandlesCommitments: true, HasCapacity: true}}, + } api := NewAPIWithConfig(k8sClient, cfg, nil) mux := http.NewServeMux() api.Init(mux, prometheus.NewRegistry(), log.Log) diff --git a/internal/scheduling/reservations/commitments/api/change_commitments_test.go b/internal/scheduling/reservations/commitments/api/change_commitments_test.go index 579173460..a98e840aa 100644 --- a/internal/scheduling/reservations/commitments/api/change_commitments_test.go +++ b/internal/scheduling/reservations/commitments/api/change_commitments_test.go @@ -144,6 +144,9 @@ func TestHandleChangeCommitments(t *testing.T) { cfg := commitments.DefaultAPIConfig() cfg.WatchTimeout = metav1.Duration{} cfg.WatchPollInterval = metav1.Duration{Duration: 100 * time.Millisecond} + cfg.FlavorGroupResourceConfig = map[string]commitments.FlavorGroupResourcesConfig{ + "*": {RAM: commitments.ResourceTypeConfig{HandlesCommitments: true, HasCapacity: true}}, + } return &cfg }(), ExpectedAPIResponse: newAPIResponse("timeout reached while processing commitment changes"), @@ -709,7 +712,16 @@ func newCRTestEnv(t *testing.T, tc CommitmentChangeTestCase) *CRTestEnv { if tc.CustomConfig != nil { api = NewAPIWithConfig(wrapped, *tc.CustomConfig, nil) } else { - api = NewAPI(wrapped) + // Default test config: all flavor groups accept RAM commitments via wildcard. + cfg := commitments.DefaultAPIConfig() + cfg.FlavorGroupResourceConfig = map[string]commitments.FlavorGroupResourcesConfig{ + "*": { + RAM: commitments.ResourceTypeConfig{HandlesCommitments: true, HasCapacity: true}, + Cores: commitments.ResourceTypeConfig{HasCapacity: true}, + Instances: commitments.ResourceTypeConfig{HasCapacity: true}, + }, + } + api = NewAPIWithConfig(wrapped, cfg, nil) } mux := http.NewServeMux() registry := prometheus.NewRegistry() diff --git a/internal/scheduling/reservations/commitments/api/info.go b/internal/scheduling/reservations/commitments/api/info.go index 2e8ddc8a8..cd8846101 100644 --- a/internal/scheduling/reservations/commitments/api/info.go +++ b/internal/scheduling/reservations/commitments/api/info.go @@ -113,11 +113,7 @@ func (api *HTTPAPI) buildServiceInfo(ctx context.Context, logger logr.Logger) (l // Build resources map resources := make(map[liquid.ResourceName]liquid.ResourceInfo) for groupName, groupData := range flavorGroups { - // Determine if this group accepts commitments (requires fixed RAM/core ratio) - handlesCommitments := commitments.FlavorGroupAcceptsCommitments(&groupData) - - // All flavor groups are registered for usage reporting. - // Only those with a fixed RAM/core ratio have HandlesCommitments=true. + resCfg := api.config.ResourceConfigForGroup(groupName) flavorNames := make([]string, 0, len(groupData.Flavors)) for _, flavor := range groupData.Flavors { @@ -157,12 +153,12 @@ func (api *HTTPAPI) buildServiceInfo(ctx context.Context, logger logr.Logger) (l groupData.SmallestFlavor.MemoryMB, flavorListStr, ), - Unit: ramUnit, // Non-standard unit: multiples of smallest flavor RAM + Unit: ramUnit, Topology: liquid.AZAwareTopology, NeedsResourceDemand: false, - HasCapacity: true, // We report capacity via /commitments/v1/report-capacity - HasQuota: false, - HandlesCommitments: handlesCommitments, // Only groups with fixed ratio accept commitments + HasCapacity: resCfg.RAM.HasCapacity, + HasQuota: resCfg.RAM.HasQuota, + HandlesCommitments: resCfg.RAM.HandlesCommitments, Attributes: attrsJSON, } @@ -173,13 +169,13 @@ func (api *HTTPAPI) buildServiceInfo(ctx context.Context, logger logr.Logger) (l "CPU cores (usable by: %s)", flavorListStr, ), - Unit: liquid.UnitNone, // Countable unit (omitted in JSON = "1") - Topology: liquid.AZAwareTopology, // Same topology as RAM + Unit: liquid.UnitNone, + Topology: liquid.AZAwareTopology, NeedsResourceDemand: false, - HasCapacity: true, // We report capacity (as 0 for now) - HasQuota: false, // No quota enforcement - HandlesCommitments: false, // Cores are derived from RAM commitments - Attributes: attrsJSON, // Same attributes (ratio info) + HasCapacity: resCfg.Cores.HasCapacity, + HasQuota: resCfg.Cores.HasQuota, + HandlesCommitments: resCfg.Cores.HandlesCommitments, + Attributes: attrsJSON, } // === 3. Instances Resource === @@ -189,13 +185,13 @@ func (api *HTTPAPI) buildServiceInfo(ctx context.Context, logger logr.Logger) (l "instances (usable by: %s)", flavorListStr, ), - Unit: liquid.UnitNone, // Countable unit (omitted in JSON = "1") - Topology: liquid.AZAwareTopology, // Same topology as RAM + Unit: liquid.UnitNone, + Topology: liquid.AZAwareTopology, NeedsResourceDemand: false, - HasCapacity: true, // We report capacity (as 0 for now) - HasQuota: false, // No quota enforcement - HandlesCommitments: false, // Instances are derived from RAM commitments - Attributes: attrsJSON, // Same attributes + HasCapacity: resCfg.Instances.HasCapacity, + HasQuota: resCfg.Instances.HasQuota, + HandlesCommitments: resCfg.Instances.HandlesCommitments, + Attributes: attrsJSON, } logger.V(1).Info("registered flavor group resources", diff --git a/internal/scheduling/reservations/commitments/api/info_test.go b/internal/scheduling/reservations/commitments/api/info_test.go index 48e12fd2c..60426a2aa 100644 --- a/internal/scheduling/reservations/commitments/api/info_test.go +++ b/internal/scheduling/reservations/commitments/api/info_test.go @@ -10,6 +10,7 @@ import ( "testing" "github.com/cobaltcore-dev/cortex/api/v1alpha1" + commitments "github.com/cobaltcore-dev/cortex/internal/scheduling/reservations/commitments" "github.com/sapcc/go-api-declarations/liquid" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" @@ -137,11 +138,9 @@ func TestHandleInfo_InvalidFlavorMemory(t *testing.T) { } } -func TestHandleInfo_HasCapacityEqualsHandlesCommitments(t *testing.T) { - // Test that ALL flavor groups get resources created: - // - Three resources are created per group: _ram, _cores, _instances - // - Only _ram of groups with FIXED ratio has HandlesCommitments=true - // - All resources have HasCapacity=true +func TestHandleInfo_ResourceFlagsFromConfig(t *testing.T) { + // Test that resource flags (HandlesCommitments, HasCapacity, HasQuota) are read from config, + // not derived from flavor group metadata. Both groups get resources regardless of ratio. scheme := runtime.NewScheme() if err := v1alpha1.AddToScheme(scheme); err != nil { t.Fatalf("failed to add scheme: %v", err) @@ -150,8 +149,6 @@ func TestHandleInfo_HasCapacityEqualsHandlesCommitments(t *testing.T) { // Create flavor groups knowledge with both fixed and variable ratio groups features := []map[string]interface{}{ { - // Group with fixed ratio - should accept commitments - // Creates 3 resources: _ram, _cores, _instances "name": "hana_fixed", "flavors": []map[string]interface{}{ {"name": "hana_c4_m16", "vcpus": 4, "memoryMB": 16384, "diskGB": 50}, @@ -159,20 +156,18 @@ func TestHandleInfo_HasCapacityEqualsHandlesCommitments(t *testing.T) { }, "largestFlavor": map[string]interface{}{"name": "hana_c8_m32", "vcpus": 8, "memoryMB": 32768, "diskGB": 100}, "smallestFlavor": map[string]interface{}{"name": "hana_c4_m16", "vcpus": 4, "memoryMB": 16384, "diskGB": 50}, - "ramCoreRatio": 4096, // Fixed: 4096 MiB per vCPU for all flavors + "ramCoreRatio": 4096, }, { - // Group with variable ratio - should NOT accept commitments - // Will be SKIPPED entirely (no resources created) "name": "v2_variable", "flavors": []map[string]interface{}{ - {"name": "v2_c4_m8", "vcpus": 4, "memoryMB": 8192, "diskGB": 50}, // 2048 MiB/vCPU - {"name": "v2_c4_m64", "vcpus": 4, "memoryMB": 65536, "diskGB": 100}, // 16384 MiB/vCPU + {"name": "v2_c4_m8", "vcpus": 4, "memoryMB": 8192, "diskGB": 50}, + {"name": "v2_c4_m64", "vcpus": 4, "memoryMB": 65536, "diskGB": 100}, }, "largestFlavor": map[string]interface{}{"name": "v2_c4_m64", "vcpus": 4, "memoryMB": 65536, "diskGB": 100}, "smallestFlavor": map[string]interface{}{"name": "v2_c4_m8", "vcpus": 4, "memoryMB": 8192, "diskGB": 50}, - "ramCoreRatioMin": 2048, // Variable: min ratio - "ramCoreRatioMax": 16384, // Variable: max ratio + "ramCoreRatioMin": 2048, + "ramCoreRatioMax": 16384, }, } @@ -199,7 +194,21 @@ func TestHandleInfo_HasCapacityEqualsHandlesCommitments(t *testing.T) { WithObjects(knowledge). Build() - api := NewAPI(k8sClient) + // hana_fixed: ram accepts commitments; v2_variable: nothing accepts commitments + cfg := commitments.DefaultAPIConfig() + cfg.FlavorGroupResourceConfig = map[string]commitments.FlavorGroupResourcesConfig{ + "hana_fixed": { + RAM: commitments.ResourceTypeConfig{HandlesCommitments: true, HasCapacity: true}, + Cores: commitments.ResourceTypeConfig{HasCapacity: true}, + Instances: commitments.ResourceTypeConfig{HasCapacity: true}, + }, + "*": { + RAM: commitments.ResourceTypeConfig{HasCapacity: true}, + Cores: commitments.ResourceTypeConfig{HasCapacity: true}, + Instances: commitments.ResourceTypeConfig{HasCapacity: true}, + }, + } + api := NewAPIWithConfig(k8sClient, cfg, nil) req := httptest.NewRequest(http.MethodGet, "/commitments/v1/info", http.NoBody) w := httptest.NewRecorder() @@ -217,14 +226,10 @@ func TestHandleInfo_HasCapacityEqualsHandlesCommitments(t *testing.T) { t.Fatalf("failed to decode response: %v", err) } - // Verify we have 6 resources (3 per flavor group, both groups included) - // hana_fixed generates: _ram, _cores, _instances - // v2_variable generates: _ram, _cores, _instances if len(serviceInfo.Resources) != 6 { t.Fatalf("expected 6 resources (3 per flavor group), got %d", len(serviceInfo.Resources)) } - // Test RAM resource: hw_version_hana_fixed_ram ramResource, ok := serviceInfo.Resources["hw_version_hana_fixed_ram"] if !ok { t.Fatal("expected hw_version_hana_fixed_ram resource to exist") @@ -233,10 +238,9 @@ func TestHandleInfo_HasCapacityEqualsHandlesCommitments(t *testing.T) { t.Error("hw_version_hana_fixed_ram: expected HasCapacity=true") } if !ramResource.HandlesCommitments { - t.Error("hw_version_hana_fixed_ram: expected HandlesCommitments=true (RAM is primary commitment resource)") + t.Error("hw_version_hana_fixed_ram: expected HandlesCommitments=true (set in config)") } - // Test Cores resource: hw_version_hana_fixed_cores coresResource, ok := serviceInfo.Resources["hw_version_hana_fixed_cores"] if !ok { t.Fatal("expected hw_version_hana_fixed_cores resource to exist") @@ -245,10 +249,9 @@ func TestHandleInfo_HasCapacityEqualsHandlesCommitments(t *testing.T) { t.Error("hw_version_hana_fixed_cores: expected HasCapacity=true") } if coresResource.HandlesCommitments { - t.Error("hw_version_hana_fixed_cores: expected HandlesCommitments=false (cores are derived)") + t.Error("hw_version_hana_fixed_cores: expected HandlesCommitments=false") } - // Test Instances resource: hw_version_hana_fixed_instances instancesResource, ok := serviceInfo.Resources["hw_version_hana_fixed_instances"] if !ok { t.Fatal("expected hw_version_hana_fixed_instances resource to exist") @@ -257,40 +260,34 @@ func TestHandleInfo_HasCapacityEqualsHandlesCommitments(t *testing.T) { t.Error("hw_version_hana_fixed_instances: expected HasCapacity=true") } if instancesResource.HandlesCommitments { - t.Error("hw_version_hana_fixed_instances: expected HandlesCommitments=false (instances are derived)") + t.Error("hw_version_hana_fixed_instances: expected HandlesCommitments=false") } - // Variable ratio group DOES have resources now, but HandlesCommitments=false for RAM + // v2_variable is covered by "*" wildcard: HasCapacity=true, HandlesCommitments=false v2RamResource, ok := serviceInfo.Resources["hw_version_v2_variable_ram"] if !ok { - t.Fatal("expected hw_version_v2_variable_ram resource to exist (all groups included)") + t.Fatal("expected hw_version_v2_variable_ram resource to exist") } if !v2RamResource.HasCapacity { t.Error("hw_version_v2_variable_ram: expected HasCapacity=true") } if v2RamResource.HandlesCommitments { - t.Error("hw_version_v2_variable_ram: expected HandlesCommitments=false (variable ratio)") + t.Error("hw_version_v2_variable_ram: expected HandlesCommitments=false (not in config)") } v2CoresResource, ok := serviceInfo.Resources["hw_version_v2_variable_cores"] if !ok { - t.Fatal("expected hw_version_v2_variable_cores resource to exist (all groups included)") + t.Fatal("expected hw_version_v2_variable_cores resource to exist") } if !v2CoresResource.HasCapacity { t.Error("hw_version_v2_variable_cores: expected HasCapacity=true") } - if v2CoresResource.HandlesCommitments { - t.Error("hw_version_v2_variable_cores: expected HandlesCommitments=false") - } v2InstancesResource, ok := serviceInfo.Resources["hw_version_v2_variable_instances"] if !ok { - t.Fatal("expected hw_version_v2_variable_instances resource to exist (all groups included)") + t.Fatal("expected hw_version_v2_variable_instances resource to exist") } if !v2InstancesResource.HasCapacity { t.Error("hw_version_v2_variable_instances: expected HasCapacity=true") } - if v2InstancesResource.HandlesCommitments { - t.Error("hw_version_v2_variable_instances: expected HandlesCommitments=false") - } } diff --git a/internal/scheduling/reservations/commitments/committed_resource_controller.go b/internal/scheduling/reservations/commitments/committed_resource_controller.go index 0481395fc..2389440e3 100644 --- a/internal/scheduling/reservations/commitments/committed_resource_controller.go +++ b/internal/scheduling/reservations/commitments/committed_resource_controller.go @@ -38,7 +38,11 @@ func (r *CommittedResourceController) Reconcile(ctx context.Context, req ctrl.Re return ctrl.Result{}, client.IgnoreNotFound(err) } - ctx = WithNewGlobalRequestID(ctx) + if creatorReq := cr.Annotations[v1alpha1.AnnotationCreatorRequestID]; creatorReq != "" { + ctx = WithGlobalRequestID(ctx, creatorReq) + } else { + ctx = WithNewGlobalRequestID(ctx) + } logger := LoggerFromContext(ctx).WithValues( "component", "committed-resource-controller", "committedResource", req.Name, diff --git a/internal/scheduling/reservations/commitments/committed_resource_controller_test.go b/internal/scheduling/reservations/commitments/committed_resource_controller_test.go index 471c013e3..1029ec997 100644 --- a/internal/scheduling/reservations/commitments/committed_resource_controller_test.go +++ b/internal/scheduling/reservations/commitments/committed_resource_controller_test.go @@ -360,13 +360,6 @@ func TestCommittedResourceController_PlacementFailure(t *testing.T) { expectedReason: "Reserving", expectRequeue: true, }, - { - name: "guaranteed AllowRejection=true: rejects on failure, no retry", - state: v1alpha1.CommitmentStatusGuaranteed, - allowRejection: true, - expectedReason: "Rejected", - expectRequeue: false, - }, { name: "confirmed AllowRejection=true: rejects on failure, no retry", state: v1alpha1.CommitmentStatusConfirmed, @@ -374,13 +367,6 @@ func TestCommittedResourceController_PlacementFailure(t *testing.T) { expectedReason: "Rejected", expectRequeue: false, }, - { - name: "guaranteed AllowRejection=false: retries on failure", - state: v1alpha1.CommitmentStatusGuaranteed, - allowRejection: false, - expectedReason: "Reserving", - expectRequeue: true, - }, { name: "confirmed AllowRejection=false: retries on failure", state: v1alpha1.CommitmentStatusConfirmed, @@ -503,31 +489,6 @@ func TestCommittedResourceController_BadSpec(t *testing.T) { } } -func TestCommittedResourceController_Idempotent(t *testing.T) { - scheme := newCRTestScheme(t) - cr := newTestCommittedResource("test-cr", v1alpha1.CommitmentStatusConfirmed) - k8sClient := newCRTestClient(scheme, cr, newTestFlavorKnowledge()) - controller := &CommittedResourceController{Client: k8sClient, Scheme: scheme, Conf: CommittedResourceControllerConfig{}} - - // Round 1: creates reservation, waits for placement. - if _, err := controller.Reconcile(context.Background(), reconcileReq(cr.Name)); err != nil { - t.Fatalf("reconcile 1: %v", err) - } - // Simulate reservation controller setting Ready=True. - setChildReservationsReady(t, k8sClient, cr.Spec.CommitmentUUID) - // Rounds 2 and 3: accepts, then stays accepted. - for i := 2; i <= 3; i++ { - if _, err := controller.Reconcile(context.Background(), reconcileReq(cr.Name)); err != nil { - t.Fatalf("reconcile %d: %v", i, err) - } - } - - if got := countChildReservations(t, k8sClient, cr.Spec.CommitmentUUID); got != 1 { - t.Errorf("expected 1 child reservation after 3 reconciles (idempotency), got %d", got) - } - assertCondition(t, k8sClient, cr.Name, metav1.ConditionTrue, "Accepted") -} - // ============================================================================ // Tests: checkChildReservationStatus generation guard // ============================================================================ diff --git a/internal/scheduling/reservations/commitments/committed_resource_integration_test.go b/internal/scheduling/reservations/commitments/committed_resource_integration_test.go deleted file mode 100644 index 0090e45f5..000000000 --- a/internal/scheduling/reservations/commitments/committed_resource_integration_test.go +++ /dev/null @@ -1,633 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package commitments - -// Integration tests for the CR lifecycle spanning CommittedResourceController and -// CommitmentReservationController. These tests drive both controllers against a shared -// fake client and verify the end-to-end state transitions without mocking internal logic. -// -// Scope: -// - State transition: planned → confirmed produces child Reservations -// - State transition: confirmed → expired cleans up child Reservations -// - Reservation controller places a child Reservation created by the CR controller -// - CR deletion removes all child Reservations - -import ( - "context" - "encoding/json" - "net/http" - "net/http/httptest" - "strings" - "testing" - "time" - - hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" - "k8s.io/apimachinery/pkg/api/meta" - "k8s.io/apimachinery/pkg/api/resource" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/client/fake" - - schedulerdelegationapi "github.com/cobaltcore-dev/cortex/api/external/nova" - "github.com/cobaltcore-dev/cortex/api/v1alpha1" -) - -// crIntegrationEnv holds shared state for integration tests. -type crIntegrationEnv struct { - k8sClient client.Client - crController *CommittedResourceController - resController *CommitmentReservationController - schedulerServer *httptest.Server -} - -func newCRIntegrationEnv(t *testing.T) *crIntegrationEnv { - t.Helper() - scheme := newCRTestScheme(t) - - hypervisor := &hv1.Hypervisor{ObjectMeta: metav1.ObjectMeta{Name: "host-1"}} - k8sClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithObjects(newTestFlavorKnowledge(), hypervisor). - WithStatusSubresource( - &v1alpha1.CommittedResource{}, - &v1alpha1.Reservation{}, - &v1alpha1.Knowledge{}, - ). - WithIndex(&v1alpha1.Reservation{}, idxReservationByCommitmentUUID, func(obj client.Object) []string { - res, ok := obj.(*v1alpha1.Reservation) - if !ok || res.Spec.CommittedResourceReservation == nil || res.Spec.CommittedResourceReservation.CommitmentUUID == "" { - return nil - } - return []string{res.Spec.CommittedResourceReservation.CommitmentUUID} - }). - WithIndex(&v1alpha1.CommittedResource{}, idxCommittedResourceByUUID, func(obj client.Object) []string { - cr, ok := obj.(*v1alpha1.CommittedResource) - if !ok || cr.Spec.CommitmentUUID == "" { - return nil - } - return []string{cr.Spec.CommitmentUUID} - }). - Build() - - schedulerServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - resp := &schedulerdelegationapi.ExternalSchedulerResponse{Hosts: []string{"host-1"}} - if err := json.NewEncoder(w).Encode(resp); err != nil { - t.Errorf("scheduler encode: %v", err) - } - })) - - crCtrl := &CommittedResourceController{ - Client: k8sClient, - Scheme: scheme, - Conf: CommittedResourceControllerConfig{RequeueIntervalRetry: metav1.Duration{Duration: 5 * time.Minute}}, - } - - resCtrl := &CommitmentReservationController{ - Client: k8sClient, - Scheme: scheme, - Conf: ReservationControllerConfig{ - SchedulerURL: schedulerServer.URL, - AllocationGracePeriod: metav1.Duration{Duration: 15 * time.Minute}, - RequeueIntervalActive: metav1.Duration{Duration: 5 * time.Minute}, - }, - } - if err := resCtrl.Init(context.Background(), resCtrl.Conf); err != nil { - t.Fatalf("resCtrl.Init: %v", err) - } - - return &crIntegrationEnv{ - k8sClient: k8sClient, - crController: crCtrl, - resController: resCtrl, - schedulerServer: schedulerServer, - } -} - -func (e *crIntegrationEnv) close() { e.schedulerServer.Close() } - -func (e *crIntegrationEnv) reconcileCR(t *testing.T, crName string) { - t.Helper() - req := ctrl.Request{NamespacedName: types.NamespacedName{Name: crName}} - if _, err := e.crController.Reconcile(context.Background(), req); err != nil { - t.Fatalf("CR reconcile: %v", err) - } -} - -func (e *crIntegrationEnv) reconcileReservation(t *testing.T, resName string) { - t.Helper() - req := ctrl.Request{NamespacedName: types.NamespacedName{Name: resName}} - if _, err := e.resController.Reconcile(context.Background(), req); err != nil { - t.Fatalf("reservation reconcile %s: %v", resName, err) - } -} - -func (e *crIntegrationEnv) listChildReservations(t *testing.T, crName string) []v1alpha1.Reservation { - t.Helper() - var list v1alpha1.ReservationList - if err := e.k8sClient.List(context.Background(), &list, client.MatchingLabels{ - v1alpha1.LabelReservationType: v1alpha1.ReservationTypeLabelCommittedResource, - }); err != nil { - t.Fatalf("list reservations: %v", err) - } - prefix := crName + "-" - var children []v1alpha1.Reservation - for _, r := range list.Items { - if strings.HasPrefix(r.Name, prefix) { - children = append(children, r) - } - } - return children -} - -func (e *crIntegrationEnv) getCR(t *testing.T, name string) v1alpha1.CommittedResource { - t.Helper() - var cr v1alpha1.CommittedResource - if err := e.k8sClient.Get(context.Background(), types.NamespacedName{Name: name}, &cr); err != nil { - t.Fatalf("get CR %s: %v", name, err) - } - return cr -} - -// reconcileChildReservations runs the reservation controller twice on every child Reservation -// for crName (first reconcile sets TargetHost, second sets Ready=True), then re-reconciles -// the CR so it can observe the placement outcomes. -func (e *crIntegrationEnv) reconcileChildReservations(t *testing.T, crName string) { - t.Helper() - for _, res := range e.listChildReservations(t, crName) { - e.reconcileReservation(t, res.Name) // calls scheduler → sets TargetHost - e.reconcileReservation(t, res.Name) // syncs TargetHost to Status → Ready=True - } - e.reconcileCR(t, crName) -} - -// ============================================================================ -// Integration tests -// ============================================================================ - -// TestCRLifecycle covers the multi-step state transitions that require imperative -// mid-test patches and cannot be expressed as a purely declarative table. -func TestCRLifecycle(t *testing.T) { - t.Run("planned→confirmed: child Reservations created and placed", func(t *testing.T) { - env := newCRIntegrationEnv(t) - defer env.close() - - cr := newTestCommittedResource("my-cr", v1alpha1.CommitmentStatusPlanned) - if err := env.k8sClient.Create(context.Background(), cr); err != nil { - t.Fatalf("create CR: %v", err) - } - - // Reconcile as planned: finalizer added, no Reservations. - env.reconcileCR(t, cr.Name) - env.reconcileCR(t, cr.Name) - if got := env.listChildReservations(t, cr.Name); len(got) != 0 { - t.Fatalf("planned: expected 0 reservations, got %d", len(got)) - } - crState := env.getCR(t, cr.Name) - cond := meta.FindStatusCondition(crState.Status.Conditions, v1alpha1.CommittedResourceConditionReady) - if cond == nil || cond.Reason != "Planned" { - t.Errorf("planned: expected Reason=Planned, got %v", cond) - } - - // Transition to confirmed. - patch := client.MergeFrom(crState.DeepCopy()) - crState.Spec.State = v1alpha1.CommitmentStatusConfirmed - if err := env.k8sClient.Patch(context.Background(), &crState, patch); err != nil { - t.Fatalf("patch state to confirmed: %v", err) - } - env.reconcileCR(t, cr.Name) - - children := env.listChildReservations(t, cr.Name) - if len(children) != 1 { - t.Fatalf("confirmed: expected 1 reservation, got %d", len(children)) - } - env.reconcileChildReservations(t, cr.Name) - - crState = env.getCR(t, cr.Name) - if !meta.IsStatusConditionTrue(crState.Status.Conditions, v1alpha1.CommittedResourceConditionReady) { - t.Errorf("confirmed: expected Ready=True") - } - }) - - t.Run("confirmed→expired: child Reservations deleted, CR marked inactive", func(t *testing.T) { - env := newCRIntegrationEnv(t) - defer env.close() - - cr := newTestCommittedResource("my-cr", v1alpha1.CommitmentStatusConfirmed) - if err := env.k8sClient.Create(context.Background(), cr); err != nil { - t.Fatalf("create CR: %v", err) - } - - // Bring to confirmed+Ready=True. - env.reconcileCR(t, cr.Name) // adds finalizer - env.reconcileCR(t, cr.Name) // creates Reservations - env.reconcileChildReservations(t, cr.Name) // places slots → Ready=True - - if got := env.listChildReservations(t, cr.Name); len(got) != 1 { - t.Fatalf("pre-expire: expected 1 reservation, got %d", len(got)) - } - - // Transition to expired. - crState := env.getCR(t, cr.Name) - patch := client.MergeFrom(crState.DeepCopy()) - crState.Spec.State = v1alpha1.CommitmentStatusExpired - if err := env.k8sClient.Patch(context.Background(), &crState, patch); err != nil { - t.Fatalf("patch state to expired: %v", err) - } - env.reconcileCR(t, cr.Name) - - if got := env.listChildReservations(t, cr.Name); len(got) != 0 { - t.Errorf("expired: expected 0 reservations, got %d", len(got)) - } - crState = env.getCR(t, cr.Name) - cond := meta.FindStatusCondition(crState.Status.Conditions, v1alpha1.CommittedResourceConditionReady) - if cond == nil || cond.Status != metav1.ConditionFalse { - t.Errorf("expired: expected Ready=False, got %v", cond) - } - if cond != nil && cond.Reason != string(v1alpha1.CommitmentStatusExpired) { - t.Errorf("expired: expected Reason=%s, got %s", v1alpha1.CommitmentStatusExpired, cond.Reason) - } - }) - - t.Run("reservation placement: two reconciles set TargetHost then Ready=True", func(t *testing.T) { - env := newCRIntegrationEnv(t) - defer env.close() - - cr := newTestCommittedResource("my-cr", v1alpha1.CommitmentStatusConfirmed) - if err := env.k8sClient.Create(context.Background(), cr); err != nil { - t.Fatalf("create CR: %v", err) - } - - env.reconcileCR(t, cr.Name) - env.reconcileCR(t, cr.Name) - - children := env.listChildReservations(t, cr.Name) - if len(children) != 1 { - t.Fatalf("expected 1 child reservation, got %d", len(children)) - } - child := children[0] - - // First reconcile: scheduler call → TargetHost written to Spec. - env.reconcileReservation(t, child.Name) - var afterFirst v1alpha1.Reservation - if err := env.k8sClient.Get(context.Background(), types.NamespacedName{Name: child.Name}, &afterFirst); err != nil { - t.Fatalf("get reservation after first reconcile: %v", err) - } - if afterFirst.Spec.TargetHost == "" { - t.Fatalf("expected TargetHost set after first reservation reconcile") - } - - // Second reconcile: TargetHost synced to Status, Ready=True. - env.reconcileReservation(t, child.Name) - var afterSecond v1alpha1.Reservation - if err := env.k8sClient.Get(context.Background(), types.NamespacedName{Name: child.Name}, &afterSecond); err != nil { - t.Fatalf("get reservation after second reconcile: %v", err) - } - if !meta.IsStatusConditionTrue(afterSecond.Status.Conditions, v1alpha1.ReservationConditionReady) { - t.Errorf("expected reservation Ready=True after placement, got %v", afterSecond.Status.Conditions) - } - if afterSecond.Status.Host != "host-1" { - t.Errorf("expected Status.Host=host-1, got %q", afterSecond.Status.Host) - } - }) - - t.Run("deletion: finalizer removed, child Reservations cleaned up", func(t *testing.T) { - env := newCRIntegrationEnv(t) - defer env.close() - - cr := newTestCommittedResource("my-cr", v1alpha1.CommitmentStatusConfirmed) - if err := env.k8sClient.Create(context.Background(), cr); err != nil { - t.Fatalf("create CR: %v", err) - } - - // Pre-create a child Reservation to verify it gets cleaned up on deletion. - // newTestCommittedResource pre-populates the finalizer, so Delete() immediately sets DeletionTimestamp. - child := &v1alpha1.Reservation{ - ObjectMeta: metav1.ObjectMeta{ - Name: "my-cr-0", - Labels: map[string]string{ - v1alpha1.LabelReservationType: v1alpha1.ReservationTypeLabelCommittedResource, - }, - }, - Spec: v1alpha1.ReservationSpec{ - Type: v1alpha1.ReservationTypeCommittedResource, - CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ - CommitmentUUID: "test-uuid-1234", - }, - }, - } - if err := env.k8sClient.Create(context.Background(), child); err != nil { - t.Fatalf("create child reservation: %v", err) - } - - crState := env.getCR(t, cr.Name) - if err := env.k8sClient.Delete(context.Background(), &crState); err != nil { - t.Fatalf("delete CR: %v", err) - } - env.reconcileCR(t, cr.Name) - - if got := env.listChildReservations(t, cr.Name); len(got) != 0 { - t.Errorf("post-deletion: expected 0 reservations, got %d", len(got)) - } - var final v1alpha1.CommittedResource - err := env.k8sClient.Get(context.Background(), types.NamespacedName{Name: cr.Name}, &final) - if client.IgnoreNotFound(err) != nil { - t.Fatalf("unexpected error after deletion: %v", err) - } - if err == nil { - for _, f := range final.Finalizers { - if f == crFinalizer { - t.Errorf("finalizer not removed after deletion reconcile") - } - } - } - }) - - t.Run("confirmed→superseded: child Reservations deleted, CR marked inactive", func(t *testing.T) { - env := newCRIntegrationEnv(t) - defer env.close() - - cr := newTestCommittedResource("my-cr", v1alpha1.CommitmentStatusConfirmed) - if err := env.k8sClient.Create(context.Background(), cr); err != nil { - t.Fatalf("create CR: %v", err) - } - - env.reconcileCR(t, cr.Name) - env.reconcileCR(t, cr.Name) - env.reconcileChildReservations(t, cr.Name) - - if got := env.listChildReservations(t, cr.Name); len(got) != 1 { - t.Fatalf("pre-supersede: expected 1 reservation, got %d", len(got)) - } - - crState := env.getCR(t, cr.Name) - patch := client.MergeFrom(crState.DeepCopy()) - crState.Spec.State = v1alpha1.CommitmentStatusSuperseded - if err := env.k8sClient.Patch(context.Background(), &crState, patch); err != nil { - t.Fatalf("patch state to superseded: %v", err) - } - env.reconcileCR(t, cr.Name) - - if got := env.listChildReservations(t, cr.Name); len(got) != 0 { - t.Errorf("superseded: expected 0 reservations, got %d", len(got)) - } - crState = env.getCR(t, cr.Name) - cond := meta.FindStatusCondition(crState.Status.Conditions, v1alpha1.CommittedResourceConditionReady) - if cond == nil || cond.Status != metav1.ConditionFalse { - t.Errorf("superseded: expected Ready=False, got %v", cond) - } - if cond != nil && cond.Reason != string(v1alpha1.CommitmentStatusSuperseded) { - t.Errorf("superseded: expected Reason=%s, got %s", v1alpha1.CommitmentStatusSuperseded, cond.Reason) - } - }) - - t.Run("idempotency: extra reconciles after Accepted do not create extra slots", func(t *testing.T) { - env := newCRIntegrationEnv(t) - defer env.close() - - cr := newTestCommittedResource("my-cr", v1alpha1.CommitmentStatusConfirmed) - if err := env.k8sClient.Create(context.Background(), cr); err != nil { - t.Fatalf("create CR: %v", err) - } - - env.reconcileCR(t, cr.Name) - env.reconcileCR(t, cr.Name) - env.reconcileChildReservations(t, cr.Name) - - if got := env.listChildReservations(t, cr.Name); len(got) != 1 { - t.Fatalf("pre-idempotency check: expected 1 reservation, got %d", len(got)) - } - - env.reconcileCR(t, cr.Name) - env.reconcileCR(t, cr.Name) - - if got := env.listChildReservations(t, cr.Name); len(got) != 1 { - t.Errorf("idempotency: expected 1 reservation after extra reconciles, got %d", len(got)) - } - crState := env.getCR(t, cr.Name) - if !meta.IsStatusConditionTrue(crState.Status.Conditions, v1alpha1.CommittedResourceConditionReady) { - t.Errorf("idempotency: expected CR to remain Ready=True after extra reconciles") - } - }) - - t.Run("AllowRejection=false: stays Reserving when scheduler rejects", func(t *testing.T) { - hypervisor := &hv1.Hypervisor{ObjectMeta: metav1.ObjectMeta{Name: "host-1"}} - env := newIntgEnv(t, []client.Object{newTestFlavorKnowledge(), hypervisor}, intgRejectScheduler) - defer env.close() - - cr := newTestCommittedResource("my-cr", v1alpha1.CommitmentStatusConfirmed) - // AllowRejection stays false (the default), so placement failure must requeue, not reject. - if err := env.k8sClient.Create(context.Background(), cr); err != nil { - t.Fatalf("create CR: %v", err) - } - - ctx := context.Background() - crReq := ctrl.Request{NamespacedName: types.NamespacedName{Name: cr.Name}} - for range 3 { - env.crController.Reconcile(ctx, crReq) //nolint:errcheck - var resList v1alpha1.ReservationList - env.k8sClient.List(ctx, &resList, client.MatchingLabels{ //nolint:errcheck - v1alpha1.LabelReservationType: v1alpha1.ReservationTypeLabelCommittedResource, - }) - for _, res := range resList.Items { - resReq := ctrl.Request{NamespacedName: types.NamespacedName{Name: res.Name}} - env.resController.Reconcile(ctx, resReq) //nolint:errcheck - env.resController.Reconcile(ctx, resReq) //nolint:errcheck - } - env.crController.Reconcile(ctx, crReq) //nolint:errcheck - } - - var final v1alpha1.CommittedResource - if err := env.k8sClient.Get(ctx, types.NamespacedName{Name: cr.Name}, &final); err != nil { - t.Fatalf("get CR: %v", err) - } - cond := meta.FindStatusCondition(final.Status.Conditions, v1alpha1.CommittedResourceConditionReady) - if cond == nil { - t.Fatalf("no Ready condition") - } - if cond.Reason == v1alpha1.CommittedResourceReasonRejected { - t.Errorf("AllowRejection=false: CR must not transition to Rejected, got Reason=%s", cond.Reason) - } - if cond.Reason != v1alpha1.CommittedResourceReasonReserving { - t.Errorf("AllowRejection=false: expected Reason=Reserving, got %s", cond.Reason) - } - }) - - t.Run("externally deleted child Reservation is recreated by CR controller", func(t *testing.T) { - env := newCRIntegrationEnv(t) - defer env.close() - - cr := newTestCommittedResource("my-cr", v1alpha1.CommitmentStatusConfirmed) - if err := env.k8sClient.Create(context.Background(), cr); err != nil { - t.Fatalf("create CR: %v", err) - } - - env.reconcileCR(t, cr.Name) - env.reconcileCR(t, cr.Name) - env.reconcileChildReservations(t, cr.Name) - - children := env.listChildReservations(t, cr.Name) - if len(children) != 1 { - t.Fatalf("expected 1 child reservation before deletion, got %d", len(children)) - } - - // Simulate out-of-band deletion of the slot. - child := children[0] - if err := env.k8sClient.Delete(context.Background(), &child); err != nil { - t.Fatalf("delete child reservation: %v", err) - } - - // CR controller detects the missing slot and recreates it. - env.reconcileCR(t, cr.Name) - // Place the new slot. - env.reconcileChildReservations(t, cr.Name) - // CR controller observes Ready=True on the recreated slot. - env.reconcileCR(t, cr.Name) - - if got := env.listChildReservations(t, cr.Name); len(got) != 1 { - t.Errorf("expected 1 reservation after recreation, got %d", len(got)) - } - crState := env.getCR(t, cr.Name) - if !meta.IsStatusConditionTrue(crState.Status.Conditions, v1alpha1.CommittedResourceConditionReady) { - t.Errorf("expected CR to be Ready=True after slot recreation") - } - }) - - t.Run("AcceptedAt: set when CR accepted", func(t *testing.T) { - env := newCRIntegrationEnv(t) - defer env.close() - - cr := newTestCommittedResource("my-cr", v1alpha1.CommitmentStatusConfirmed) - if err := env.k8sClient.Create(context.Background(), cr); err != nil { - t.Fatalf("create CR: %v", err) - } - - env.reconcileCR(t, cr.Name) - env.reconcileCR(t, cr.Name) - env.reconcileChildReservations(t, cr.Name) - - crState := env.getCR(t, cr.Name) - if !meta.IsStatusConditionTrue(crState.Status.Conditions, v1alpha1.CommittedResourceConditionReady) { - t.Fatalf("expected CR to be Ready=True") - } - if crState.Status.AcceptedAt == nil { - t.Errorf("expected AcceptedAt to be set on acceptance") - } - if crState.Status.AcceptedAmount == nil { - t.Errorf("expected AcceptedAmount to be set on acceptance") - } else if crState.Status.AcceptedAmount.Cmp(resource.MustParse("4Gi")) != 0 { - t.Errorf("AcceptedAmount: want 4Gi, got %s", crState.Status.AcceptedAmount.String()) - } - }) - - t.Run("resize failure: rolls back to AcceptedAmount, prior slot preserved", func(t *testing.T) { - // Scheduler: accepts the first placement call (initial 4 GiB slot), rejects all subsequent. - objects := []client.Object{newTestFlavorKnowledge(), intgHypervisor("host-1")} - env := newIntgEnv(t, objects, intgAcceptFirstScheduler(1)) - defer env.close() - - cr := intgCRAllowRejection("my-cr", "uuid-resize-0001", v1alpha1.CommitmentStatusConfirmed) - if err := env.k8sClient.Create(context.Background(), cr); err != nil { - t.Fatalf("create CR: %v", err) - } - - // Phase 1: accept at 4 GiB (1 slot). Uses 1 scheduler call. - intgDriveToTerminal(t, env, []string{cr.Name}) - var crState v1alpha1.CommittedResource - if err := env.k8sClient.Get(context.Background(), types.NamespacedName{Name: cr.Name}, &crState); err != nil { - t.Fatalf("get CR: %v", err) - } - if !meta.IsStatusConditionTrue(crState.Status.Conditions, v1alpha1.CommittedResourceConditionReady) { - t.Fatalf("phase 1: expected CR to be Ready=True after initial placement") - } - if crState.Status.AcceptedAmount == nil || crState.Status.AcceptedAmount.Cmp(resource.MustParse("4Gi")) != 0 { - t.Fatalf("phase 1: AcceptedAmount must be 4Gi, got %v", crState.Status.AcceptedAmount) - } - - // Phase 2: resize to 8 GiB (needs 2 slots). Scheduler has no more accepts. - patch := client.MergeFrom(crState.DeepCopy()) - crState.Spec.Amount = resource.MustParse("8Gi") - if err := env.k8sClient.Patch(context.Background(), &crState, patch); err != nil { - t.Fatalf("patch CR to 8Gi: %v", err) - } - - ctx := context.Background() - crReq := ctrl.Request{NamespacedName: types.NamespacedName{Name: cr.Name}} - - // CR controller: applyReservationState bumps gen on existing slot, creates 2nd slot. - env.crController.Reconcile(ctx, crReq) //nolint:errcheck - // Reservation controller: existing slot echoes new ParentGeneration (no scheduler call); - // new slot calls scheduler → rejected. - var resList v1alpha1.ReservationList - env.k8sClient.List(ctx, &resList, client.MatchingLabels{ //nolint:errcheck - v1alpha1.LabelReservationType: v1alpha1.ReservationTypeLabelCommittedResource, - }) - for _, res := range resList.Items { - resReq := ctrl.Request{NamespacedName: types.NamespacedName{Name: res.Name}} - env.resController.Reconcile(ctx, resReq) //nolint:errcheck - env.resController.Reconcile(ctx, resReq) //nolint:errcheck - } - // CR controller: detects 2nd slot Ready=False → rollbackToAccepted (keeps 1 slot) → Rejected. - env.crController.Reconcile(ctx, crReq) //nolint:errcheck - - // Rollback must preserve 1 slot (matching AcceptedAmount=4Gi), not delete all. - var finalList v1alpha1.ReservationList - if err := env.k8sClient.List(ctx, &finalList, client.MatchingLabels{ - v1alpha1.LabelReservationType: v1alpha1.ReservationTypeLabelCommittedResource, - }); err != nil { - t.Fatalf("list reservations: %v", err) - } - if len(finalList.Items) != 1 { - t.Errorf("resize rollback: want 1 slot (AcceptedAmount), got %d", len(finalList.Items)) - } - intgAssertCRCondition(t, env.k8sClient, []string{cr.Name}, metav1.ConditionFalse, v1alpha1.CommittedResourceReasonRejected) - }) - - t.Run("AllowRejection=false: eventually accepted after scheduler starts accepting", func(t *testing.T) { - // Scheduler rejects the first 2 calls (one per reservation controller reconcile pair), - // then accepts all subsequent. AllowRejection=false means the CR controller retries rather - // than rejecting, so the CR must eventually reach Accepted once the scheduler cooperates. - objects := []client.Object{newTestFlavorKnowledge(), intgHypervisor("host-1")} - env := newIntgEnv(t, objects, intgRejectFirstScheduler(2)) - defer env.close() - - cr := newTestCommittedResource("my-cr", v1alpha1.CommitmentStatusConfirmed) - // AllowRejection stays false (default), so placement failure must requeue, not reject. - if err := env.k8sClient.Create(context.Background(), cr); err != nil { - t.Fatalf("create CR: %v", err) - } - - ctx := context.Background() - crReq := ctrl.Request{NamespacedName: types.NamespacedName{Name: cr.Name}} - for range 3 { - env.crController.Reconcile(ctx, crReq) //nolint:errcheck - var resList v1alpha1.ReservationList - env.k8sClient.List(ctx, &resList, client.MatchingLabels{ //nolint:errcheck - v1alpha1.LabelReservationType: v1alpha1.ReservationTypeLabelCommittedResource, - }) - for _, res := range resList.Items { - resReq := ctrl.Request{NamespacedName: types.NamespacedName{Name: res.Name}} - env.resController.Reconcile(ctx, resReq) //nolint:errcheck - env.resController.Reconcile(ctx, resReq) //nolint:errcheck - } - env.crController.Reconcile(ctx, crReq) //nolint:errcheck - } - - var final v1alpha1.CommittedResource - if err := env.k8sClient.Get(ctx, types.NamespacedName{Name: cr.Name}, &final); err != nil { - t.Fatalf("get CR: %v", err) - } - cond := meta.FindStatusCondition(final.Status.Conditions, v1alpha1.CommittedResourceConditionReady) - if cond == nil { - t.Fatalf("no Ready condition after retries") - } - if cond.Reason == v1alpha1.CommittedResourceReasonRejected { - t.Errorf("AllowRejection=false: CR must not be Rejected, got Reason=%s", cond.Reason) - } - if cond.Status != metav1.ConditionTrue || cond.Reason != v1alpha1.CommittedResourceReasonAccepted { - t.Errorf("AllowRejection=false: expected Ready=True/Accepted after retries, got Ready=%s/Reason=%s", cond.Status, cond.Reason) - } - }) -} diff --git a/internal/scheduling/reservations/commitments/config.go b/internal/scheduling/reservations/commitments/config.go index fe05fcc20..c30c87953 100644 --- a/internal/scheduling/reservations/commitments/config.go +++ b/internal/scheduling/reservations/commitments/config.go @@ -49,6 +49,20 @@ type CommittedResourceControllerConfig struct { RequeueIntervalRetry metav1.Duration `json:"requeueIntervalRetry"` } +// ResourceTypeConfig holds per-resource flags for a single resource type within a flavor group. +type ResourceTypeConfig struct { + HandlesCommitments bool `json:"handlesCommitments"` + HasCapacity bool `json:"hasCapacity"` + HasQuota bool `json:"hasQuota"` +} + +// FlavorGroupResourcesConfig groups resource type configs for the three resources of a flavor group. +type FlavorGroupResourcesConfig struct { + RAM ResourceTypeConfig `json:"ram"` + Cores ResourceTypeConfig `json:"cores"` + Instances ResourceTypeConfig `json:"instances"` +} + // APIConfig holds configuration for the LIQUID commitment HTTP endpoints. type APIConfig struct { // EnableChangeCommitments controls whether the change-commitments endpoint is active. @@ -64,6 +78,22 @@ type APIConfig struct { // WatchPollInterval is how frequently the change-commitments handler polls // CommittedResource CRD conditions while waiting for the controller outcome. WatchPollInterval metav1.Duration `json:"watchPollInterval"` + // FlavorGroupResourceConfig maps flavor group IDs to resource flag configs; "*" acts as catch-all. + FlavorGroupResourceConfig map[string]FlavorGroupResourcesConfig `json:"flavorGroupResourceConfig,omitempty"` +} + +// ResourceConfigForGroup returns the resource config for the given flavor group ID, +// falling back to the "*" catch-all if no exact match exists. +func (c APIConfig) ResourceConfigForGroup(groupID string) FlavorGroupResourcesConfig { + if c.FlavorGroupResourceConfig != nil { + if cfg, ok := c.FlavorGroupResourceConfig[groupID]; ok { + return cfg + } + if cfg, ok := c.FlavorGroupResourceConfig["*"]; ok { + return cfg + } + } + return FlavorGroupResourcesConfig{} } func DefaultAPIConfig() APIConfig { diff --git a/internal/scheduling/reservations/commitments/flavor_group_eligibility.go b/internal/scheduling/reservations/commitments/flavor_group_eligibility.go deleted file mode 100644 index 00218835f..000000000 --- a/internal/scheduling/reservations/commitments/flavor_group_eligibility.go +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package commitments - -import ( - "fmt" - - "github.com/cobaltcore-dev/cortex/internal/knowledge/extractor/plugins/compute" -) - -// FlavorGroupAcceptsCommitments returns true if the given flavor group can accept committed resources. -// Currently, only groups with a fixed RAM/core ratio (all flavors have the same ratio) accept CRs. -// This is the single source of truth for CR eligibility and should be used across all CR APIs. -func FlavorGroupAcceptsCommitments(fg *compute.FlavorGroupFeature) bool { - return fg.HasFixedRamCoreRatio() -} - -// FlavorGroupCommitmentRejectionReason returns the reason why the given flavor group does not accept CRs. -// Returns empty string if the group accepts commitments. -func FlavorGroupCommitmentRejectionReason(fg *compute.FlavorGroupFeature) string { - if FlavorGroupAcceptsCommitments(fg) { - return "" - } - // Differentiate between missing ratio metadata and variable ratio - if fg.RamCoreRatioMin == nil && fg.RamCoreRatioMax == nil { - return fmt.Sprintf("flavor group %q has no computable RAM/core ratio metadata and does not accept commitments", fg.Name) - } - return fmt.Sprintf("flavor group %q has variable RAM/core ratio (min=%d, max=%d) and does not accept commitments", - fg.Name, *fg.RamCoreRatioMin, *fg.RamCoreRatioMax) -} diff --git a/internal/scheduling/reservations/commitments/integration_test.go b/internal/scheduling/reservations/commitments/integration_test.go index 138f3c74c..e89e2adb1 100644 --- a/internal/scheduling/reservations/commitments/integration_test.go +++ b/internal/scheduling/reservations/commitments/integration_test.go @@ -3,11 +3,12 @@ package commitments -// Table-driven integration tests for the committed-resource lifecycle. +// Integration tests for the committed-resource lifecycle. // -// Each test case wires CommittedResourceController and CommitmentReservationController -// against a shared fake k8s client and a mock Nova scheduler, then drives both -// controllers synchronously until every CR reaches a terminal condition. +// Both test suites wire CommittedResourceController and CommitmentReservationController +// against a shared fake k8s client and a mock Nova scheduler: +// - TestCRIntegration — table-driven declarative scenarios +// - TestCRLifecycle — imperative sub-tests for multi-step transitions // // Terminal conditions (no further reconcile expected without external input): // - Ready=True / Accepted @@ -21,6 +22,7 @@ import ( "encoding/json" "net/http" "net/http/httptest" + "strings" "sync/atomic" "testing" "time" @@ -383,6 +385,67 @@ func newIntgEnv(t *testing.T, initialObjects []client.Object, schedulerFn http.H func (e *intgEnv) close() { e.schedulerSrv.Close() } +func newDefaultIntgEnv(t *testing.T) *intgEnv { + t.Helper() + objects := []client.Object{newTestFlavorKnowledge(), intgHypervisor("host-1")} + return newIntgEnv(t, objects, intgAcceptScheduler) +} + +func (e *intgEnv) reconcileCR(t *testing.T, crName string) { + t.Helper() + req := ctrl.Request{NamespacedName: types.NamespacedName{Name: crName}} + if _, err := e.crController.Reconcile(context.Background(), req); err != nil { + t.Fatalf("CR reconcile: %v", err) + } +} + +func (e *intgEnv) reconcileReservation(t *testing.T, resName string) { + t.Helper() + req := ctrl.Request{NamespacedName: types.NamespacedName{Name: resName}} + if _, err := e.resController.Reconcile(context.Background(), req); err != nil { + t.Fatalf("reservation reconcile %s: %v", resName, err) + } +} + +func (e *intgEnv) listChildReservations(t *testing.T, crName string) []v1alpha1.Reservation { + t.Helper() + var list v1alpha1.ReservationList + if err := e.k8sClient.List(context.Background(), &list, client.MatchingLabels{ + v1alpha1.LabelReservationType: v1alpha1.ReservationTypeLabelCommittedResource, + }); err != nil { + t.Fatalf("list reservations: %v", err) + } + prefix := crName + "-" + var children []v1alpha1.Reservation + for _, r := range list.Items { + if strings.HasPrefix(r.Name, prefix) { + children = append(children, r) + } + } + return children +} + +func (e *intgEnv) getCR(t *testing.T, name string) v1alpha1.CommittedResource { + t.Helper() + var cr v1alpha1.CommittedResource + if err := e.k8sClient.Get(context.Background(), types.NamespacedName{Name: name}, &cr); err != nil { + t.Fatalf("get CR %s: %v", name, err) + } + return cr +} + +// reconcileChildReservations runs the reservation controller twice on every child Reservation +// for crName (first reconcile sets TargetHost, second sets Ready=True), then re-reconciles +// the CR so it can observe the placement outcomes. +func (e *intgEnv) reconcileChildReservations(t *testing.T, crName string) { + t.Helper() + for _, res := range e.listChildReservations(t, crName) { + e.reconcileReservation(t, res.Name) // calls scheduler → sets TargetHost + e.reconcileReservation(t, res.Name) // syncs TargetHost to Status → Ready=True + } + e.reconcileCR(t, crName) +} + // ============================================================================ // Reconcile driver // ============================================================================ @@ -616,3 +679,471 @@ func intgExistingReservation(name, commitmentUUID string) *v1alpha1.Reservation }, } } + +// ============================================================================ +// Imperative lifecycle tests +// ============================================================================ + +// TestCRLifecycle covers multi-step state transitions that require imperative +// mid-test patches and cannot be expressed as a purely declarative table. +func TestCRLifecycle(t *testing.T) { + t.Run("planned→confirmed: child Reservations created and placed", func(t *testing.T) { + env := newDefaultIntgEnv(t) + defer env.close() + + cr := newTestCommittedResource("my-cr", v1alpha1.CommitmentStatusPlanned) + if err := env.k8sClient.Create(context.Background(), cr); err != nil { + t.Fatalf("create CR: %v", err) + } + + // Reconcile as planned: finalizer added, no Reservations. + env.reconcileCR(t, cr.Name) + env.reconcileCR(t, cr.Name) + if got := env.listChildReservations(t, cr.Name); len(got) != 0 { + t.Fatalf("planned: expected 0 reservations, got %d", len(got)) + } + crState := env.getCR(t, cr.Name) + cond := meta.FindStatusCondition(crState.Status.Conditions, v1alpha1.CommittedResourceConditionReady) + if cond == nil || cond.Reason != "Planned" { + t.Errorf("planned: expected Reason=Planned, got %v", cond) + } + + // Transition to confirmed. + patch := client.MergeFrom(crState.DeepCopy()) + crState.Spec.State = v1alpha1.CommitmentStatusConfirmed + if err := env.k8sClient.Patch(context.Background(), &crState, patch); err != nil { + t.Fatalf("patch state to confirmed: %v", err) + } + env.reconcileCR(t, cr.Name) + + children := env.listChildReservations(t, cr.Name) + if len(children) != 1 { + t.Fatalf("confirmed: expected 1 reservation, got %d", len(children)) + } + env.reconcileChildReservations(t, cr.Name) + + crState = env.getCR(t, cr.Name) + if !meta.IsStatusConditionTrue(crState.Status.Conditions, v1alpha1.CommittedResourceConditionReady) { + t.Errorf("confirmed: expected Ready=True") + } + }) + + t.Run("confirmed→expired: child Reservations deleted, CR marked inactive", func(t *testing.T) { + env := newDefaultIntgEnv(t) + defer env.close() + + cr := newTestCommittedResource("my-cr", v1alpha1.CommitmentStatusConfirmed) + if err := env.k8sClient.Create(context.Background(), cr); err != nil { + t.Fatalf("create CR: %v", err) + } + + // Bring to confirmed+Ready=True. + env.reconcileCR(t, cr.Name) // adds finalizer + env.reconcileCR(t, cr.Name) // creates Reservations + env.reconcileChildReservations(t, cr.Name) // places slots → Ready=True + + if got := env.listChildReservations(t, cr.Name); len(got) != 1 { + t.Fatalf("pre-expire: expected 1 reservation, got %d", len(got)) + } + + // Transition to expired. + crState := env.getCR(t, cr.Name) + patch := client.MergeFrom(crState.DeepCopy()) + crState.Spec.State = v1alpha1.CommitmentStatusExpired + if err := env.k8sClient.Patch(context.Background(), &crState, patch); err != nil { + t.Fatalf("patch state to expired: %v", err) + } + env.reconcileCR(t, cr.Name) + + if got := env.listChildReservations(t, cr.Name); len(got) != 0 { + t.Errorf("expired: expected 0 reservations, got %d", len(got)) + } + crState = env.getCR(t, cr.Name) + cond := meta.FindStatusCondition(crState.Status.Conditions, v1alpha1.CommittedResourceConditionReady) + if cond == nil || cond.Status != metav1.ConditionFalse { + t.Errorf("expired: expected Ready=False, got %v", cond) + } + if cond != nil && cond.Reason != string(v1alpha1.CommitmentStatusExpired) { + t.Errorf("expired: expected Reason=%s, got %s", v1alpha1.CommitmentStatusExpired, cond.Reason) + } + }) + + t.Run("reservation placement: two reconciles set TargetHost then Ready=True", func(t *testing.T) { + env := newDefaultIntgEnv(t) + defer env.close() + + cr := newTestCommittedResource("my-cr", v1alpha1.CommitmentStatusConfirmed) + if err := env.k8sClient.Create(context.Background(), cr); err != nil { + t.Fatalf("create CR: %v", err) + } + + env.reconcileCR(t, cr.Name) + env.reconcileCR(t, cr.Name) + + children := env.listChildReservations(t, cr.Name) + if len(children) != 1 { + t.Fatalf("expected 1 child reservation, got %d", len(children)) + } + child := children[0] + + // First reconcile: scheduler call → TargetHost written to Spec. + env.reconcileReservation(t, child.Name) + var afterFirst v1alpha1.Reservation + if err := env.k8sClient.Get(context.Background(), types.NamespacedName{Name: child.Name}, &afterFirst); err != nil { + t.Fatalf("get reservation after first reconcile: %v", err) + } + if afterFirst.Spec.TargetHost == "" { + t.Fatalf("expected TargetHost set after first reservation reconcile") + } + + // Second reconcile: TargetHost synced to Status, Ready=True. + env.reconcileReservation(t, child.Name) + var afterSecond v1alpha1.Reservation + if err := env.k8sClient.Get(context.Background(), types.NamespacedName{Name: child.Name}, &afterSecond); err != nil { + t.Fatalf("get reservation after second reconcile: %v", err) + } + if !meta.IsStatusConditionTrue(afterSecond.Status.Conditions, v1alpha1.ReservationConditionReady) { + t.Errorf("expected reservation Ready=True after placement, got %v", afterSecond.Status.Conditions) + } + if afterSecond.Status.Host != "host-1" { + t.Errorf("expected Status.Host=host-1, got %q", afterSecond.Status.Host) + } + }) + + t.Run("deletion: finalizer removed, child Reservations cleaned up", func(t *testing.T) { + env := newDefaultIntgEnv(t) + defer env.close() + + cr := newTestCommittedResource("my-cr", v1alpha1.CommitmentStatusConfirmed) + if err := env.k8sClient.Create(context.Background(), cr); err != nil { + t.Fatalf("create CR: %v", err) + } + + // Pre-create a child Reservation to verify it gets cleaned up on deletion. + // newTestCommittedResource pre-populates the finalizer, so Delete() immediately sets DeletionTimestamp. + child := &v1alpha1.Reservation{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-cr-0", + Labels: map[string]string{ + v1alpha1.LabelReservationType: v1alpha1.ReservationTypeLabelCommittedResource, + }, + }, + Spec: v1alpha1.ReservationSpec{ + Type: v1alpha1.ReservationTypeCommittedResource, + CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ + CommitmentUUID: "test-uuid-1234", + }, + }, + } + if err := env.k8sClient.Create(context.Background(), child); err != nil { + t.Fatalf("create child reservation: %v", err) + } + + crState := env.getCR(t, cr.Name) + if err := env.k8sClient.Delete(context.Background(), &crState); err != nil { + t.Fatalf("delete CR: %v", err) + } + env.reconcileCR(t, cr.Name) + + if got := env.listChildReservations(t, cr.Name); len(got) != 0 { + t.Errorf("post-deletion: expected 0 reservations, got %d", len(got)) + } + var final v1alpha1.CommittedResource + err := env.k8sClient.Get(context.Background(), types.NamespacedName{Name: cr.Name}, &final) + if client.IgnoreNotFound(err) != nil { + t.Fatalf("unexpected error after deletion: %v", err) + } + if err == nil { + for _, f := range final.Finalizers { + if f == crFinalizer { + t.Errorf("finalizer not removed after deletion reconcile") + } + } + } + }) + + t.Run("confirmed→superseded: child Reservations deleted, CR marked inactive", func(t *testing.T) { + env := newDefaultIntgEnv(t) + defer env.close() + + cr := newTestCommittedResource("my-cr", v1alpha1.CommitmentStatusConfirmed) + if err := env.k8sClient.Create(context.Background(), cr); err != nil { + t.Fatalf("create CR: %v", err) + } + + env.reconcileCR(t, cr.Name) + env.reconcileCR(t, cr.Name) + env.reconcileChildReservations(t, cr.Name) + + if got := env.listChildReservations(t, cr.Name); len(got) != 1 { + t.Fatalf("pre-supersede: expected 1 reservation, got %d", len(got)) + } + + crState := env.getCR(t, cr.Name) + patch := client.MergeFrom(crState.DeepCopy()) + crState.Spec.State = v1alpha1.CommitmentStatusSuperseded + if err := env.k8sClient.Patch(context.Background(), &crState, patch); err != nil { + t.Fatalf("patch state to superseded: %v", err) + } + env.reconcileCR(t, cr.Name) + + if got := env.listChildReservations(t, cr.Name); len(got) != 0 { + t.Errorf("superseded: expected 0 reservations, got %d", len(got)) + } + crState = env.getCR(t, cr.Name) + cond := meta.FindStatusCondition(crState.Status.Conditions, v1alpha1.CommittedResourceConditionReady) + if cond == nil || cond.Status != metav1.ConditionFalse { + t.Errorf("superseded: expected Ready=False, got %v", cond) + } + if cond != nil && cond.Reason != string(v1alpha1.CommitmentStatusSuperseded) { + t.Errorf("superseded: expected Reason=%s, got %s", v1alpha1.CommitmentStatusSuperseded, cond.Reason) + } + }) + + t.Run("idempotency: extra reconciles after Accepted do not create extra slots", func(t *testing.T) { + env := newDefaultIntgEnv(t) + defer env.close() + + cr := newTestCommittedResource("my-cr", v1alpha1.CommitmentStatusConfirmed) + if err := env.k8sClient.Create(context.Background(), cr); err != nil { + t.Fatalf("create CR: %v", err) + } + + env.reconcileCR(t, cr.Name) + env.reconcileCR(t, cr.Name) + env.reconcileChildReservations(t, cr.Name) + + if got := env.listChildReservations(t, cr.Name); len(got) != 1 { + t.Fatalf("pre-idempotency check: expected 1 reservation, got %d", len(got)) + } + + env.reconcileCR(t, cr.Name) + env.reconcileCR(t, cr.Name) + + if got := env.listChildReservations(t, cr.Name); len(got) != 1 { + t.Errorf("idempotency: expected 1 reservation after extra reconciles, got %d", len(got)) + } + crState := env.getCR(t, cr.Name) + if !meta.IsStatusConditionTrue(crState.Status.Conditions, v1alpha1.CommittedResourceConditionReady) { + t.Errorf("idempotency: expected CR to remain Ready=True after extra reconciles") + } + }) + + t.Run("AllowRejection=false: stays Reserving when scheduler rejects", func(t *testing.T) { + env := newIntgEnv(t, []client.Object{newTestFlavorKnowledge(), intgHypervisor("host-1")}, intgRejectScheduler) + defer env.close() + + cr := newTestCommittedResource("my-cr", v1alpha1.CommitmentStatusConfirmed) + // AllowRejection stays false (the default), so placement failure must requeue, not reject. + if err := env.k8sClient.Create(context.Background(), cr); err != nil { + t.Fatalf("create CR: %v", err) + } + + ctx := context.Background() + crReq := ctrl.Request{NamespacedName: types.NamespacedName{Name: cr.Name}} + for range 3 { + env.crController.Reconcile(ctx, crReq) //nolint:errcheck + var resList v1alpha1.ReservationList + env.k8sClient.List(ctx, &resList, client.MatchingLabels{ //nolint:errcheck + v1alpha1.LabelReservationType: v1alpha1.ReservationTypeLabelCommittedResource, + }) + for _, res := range resList.Items { + resReq := ctrl.Request{NamespacedName: types.NamespacedName{Name: res.Name}} + env.resController.Reconcile(ctx, resReq) //nolint:errcheck + env.resController.Reconcile(ctx, resReq) //nolint:errcheck + } + env.crController.Reconcile(ctx, crReq) //nolint:errcheck + } + + var final v1alpha1.CommittedResource + if err := env.k8sClient.Get(ctx, types.NamespacedName{Name: cr.Name}, &final); err != nil { + t.Fatalf("get CR: %v", err) + } + cond := meta.FindStatusCondition(final.Status.Conditions, v1alpha1.CommittedResourceConditionReady) + if cond == nil { + t.Fatalf("no Ready condition") + } + if cond.Reason == v1alpha1.CommittedResourceReasonRejected { + t.Errorf("AllowRejection=false: CR must not transition to Rejected, got Reason=%s", cond.Reason) + } + if cond.Reason != v1alpha1.CommittedResourceReasonReserving { + t.Errorf("AllowRejection=false: expected Reason=Reserving, got %s", cond.Reason) + } + }) + + t.Run("externally deleted child Reservation is recreated by CR controller", func(t *testing.T) { + env := newDefaultIntgEnv(t) + defer env.close() + + cr := newTestCommittedResource("my-cr", v1alpha1.CommitmentStatusConfirmed) + if err := env.k8sClient.Create(context.Background(), cr); err != nil { + t.Fatalf("create CR: %v", err) + } + + env.reconcileCR(t, cr.Name) + env.reconcileCR(t, cr.Name) + env.reconcileChildReservations(t, cr.Name) + + children := env.listChildReservations(t, cr.Name) + if len(children) != 1 { + t.Fatalf("expected 1 child reservation before deletion, got %d", len(children)) + } + + // Simulate out-of-band deletion of the slot. + child := children[0] + if err := env.k8sClient.Delete(context.Background(), &child); err != nil { + t.Fatalf("delete child reservation: %v", err) + } + + // CR controller detects the missing slot and recreates it. + env.reconcileCR(t, cr.Name) + // Place the new slot. + env.reconcileChildReservations(t, cr.Name) + // CR controller observes Ready=True on the recreated slot. + env.reconcileCR(t, cr.Name) + + if got := env.listChildReservations(t, cr.Name); len(got) != 1 { + t.Errorf("expected 1 reservation after recreation, got %d", len(got)) + } + crState := env.getCR(t, cr.Name) + if !meta.IsStatusConditionTrue(crState.Status.Conditions, v1alpha1.CommittedResourceConditionReady) { + t.Errorf("expected CR to be Ready=True after slot recreation") + } + }) + + t.Run("AcceptedAt: set when CR accepted", func(t *testing.T) { + env := newDefaultIntgEnv(t) + defer env.close() + + cr := newTestCommittedResource("my-cr", v1alpha1.CommitmentStatusConfirmed) + if err := env.k8sClient.Create(context.Background(), cr); err != nil { + t.Fatalf("create CR: %v", err) + } + + env.reconcileCR(t, cr.Name) + env.reconcileCR(t, cr.Name) + env.reconcileChildReservations(t, cr.Name) + + crState := env.getCR(t, cr.Name) + if !meta.IsStatusConditionTrue(crState.Status.Conditions, v1alpha1.CommittedResourceConditionReady) { + t.Fatalf("expected CR to be Ready=True") + } + if crState.Status.AcceptedAt == nil { + t.Errorf("expected AcceptedAt to be set on acceptance") + } + if crState.Status.AcceptedAmount == nil { + t.Errorf("expected AcceptedAmount to be set on acceptance") + } else if crState.Status.AcceptedAmount.Cmp(resource.MustParse("4Gi")) != 0 { + t.Errorf("AcceptedAmount: want 4Gi, got %s", crState.Status.AcceptedAmount.String()) + } + }) + + t.Run("resize failure: rolls back to AcceptedAmount, prior slot preserved", func(t *testing.T) { + // Scheduler: accepts the first placement call (initial 4 GiB slot), rejects all subsequent. + objects := []client.Object{newTestFlavorKnowledge(), intgHypervisor("host-1")} + env := newIntgEnv(t, objects, intgAcceptFirstScheduler(1)) + defer env.close() + + cr := intgCRAllowRejection("my-cr", "uuid-resize-0001", v1alpha1.CommitmentStatusConfirmed) + if err := env.k8sClient.Create(context.Background(), cr); err != nil { + t.Fatalf("create CR: %v", err) + } + + // Phase 1: accept at 4 GiB (1 slot). Uses 1 scheduler call. + intgDriveToTerminal(t, env, []string{cr.Name}) + var crState v1alpha1.CommittedResource + if err := env.k8sClient.Get(context.Background(), types.NamespacedName{Name: cr.Name}, &crState); err != nil { + t.Fatalf("get CR: %v", err) + } + if !meta.IsStatusConditionTrue(crState.Status.Conditions, v1alpha1.CommittedResourceConditionReady) { + t.Fatalf("phase 1: expected CR to be Ready=True after initial placement") + } + if crState.Status.AcceptedAmount == nil || crState.Status.AcceptedAmount.Cmp(resource.MustParse("4Gi")) != 0 { + t.Fatalf("phase 1: AcceptedAmount must be 4Gi, got %v", crState.Status.AcceptedAmount) + } + + // Phase 2: resize to 8 GiB (needs 2 slots). Scheduler has no more accepts. + patch := client.MergeFrom(crState.DeepCopy()) + crState.Spec.Amount = resource.MustParse("8Gi") + if err := env.k8sClient.Patch(context.Background(), &crState, patch); err != nil { + t.Fatalf("patch CR to 8Gi: %v", err) + } + + ctx := context.Background() + crReq := ctrl.Request{NamespacedName: types.NamespacedName{Name: cr.Name}} + + // CR controller: applyReservationState bumps gen on existing slot, creates 2nd slot. + env.crController.Reconcile(ctx, crReq) //nolint:errcheck + // Reservation controller: existing slot echoes new ParentGeneration (no scheduler call); + // new slot calls scheduler → rejected. + var resList v1alpha1.ReservationList + env.k8sClient.List(ctx, &resList, client.MatchingLabels{ //nolint:errcheck + v1alpha1.LabelReservationType: v1alpha1.ReservationTypeLabelCommittedResource, + }) + for _, res := range resList.Items { + resReq := ctrl.Request{NamespacedName: types.NamespacedName{Name: res.Name}} + env.resController.Reconcile(ctx, resReq) //nolint:errcheck + env.resController.Reconcile(ctx, resReq) //nolint:errcheck + } + // CR controller: detects 2nd slot Ready=False → rollbackToAccepted (keeps 1 slot) → Rejected. + env.crController.Reconcile(ctx, crReq) //nolint:errcheck + + // Rollback must preserve 1 slot (matching AcceptedAmount=4Gi), not delete all. + var finalList v1alpha1.ReservationList + if err := env.k8sClient.List(ctx, &finalList, client.MatchingLabels{ + v1alpha1.LabelReservationType: v1alpha1.ReservationTypeLabelCommittedResource, + }); err != nil { + t.Fatalf("list reservations: %v", err) + } + if len(finalList.Items) != 1 { + t.Errorf("resize rollback: want 1 slot (AcceptedAmount), got %d", len(finalList.Items)) + } + intgAssertCRCondition(t, env.k8sClient, []string{cr.Name}, metav1.ConditionFalse, v1alpha1.CommittedResourceReasonRejected) + }) + + t.Run("AllowRejection=false: eventually accepted after scheduler starts accepting", func(t *testing.T) { + // Scheduler rejects the first 2 calls (one per reservation controller reconcile pair), + // then accepts all subsequent. AllowRejection=false means the CR controller retries rather + // than rejecting, so the CR must eventually reach Accepted once the scheduler cooperates. + objects := []client.Object{newTestFlavorKnowledge(), intgHypervisor("host-1")} + env := newIntgEnv(t, objects, intgRejectFirstScheduler(2)) + defer env.close() + + cr := newTestCommittedResource("my-cr", v1alpha1.CommitmentStatusConfirmed) + // AllowRejection stays false (default), so placement failure must requeue, not reject. + if err := env.k8sClient.Create(context.Background(), cr); err != nil { + t.Fatalf("create CR: %v", err) + } + + ctx := context.Background() + crReq := ctrl.Request{NamespacedName: types.NamespacedName{Name: cr.Name}} + for range 3 { + env.crController.Reconcile(ctx, crReq) //nolint:errcheck + var resList v1alpha1.ReservationList + env.k8sClient.List(ctx, &resList, client.MatchingLabels{ //nolint:errcheck + v1alpha1.LabelReservationType: v1alpha1.ReservationTypeLabelCommittedResource, + }) + for _, res := range resList.Items { + resReq := ctrl.Request{NamespacedName: types.NamespacedName{Name: res.Name}} + env.resController.Reconcile(ctx, resReq) //nolint:errcheck + env.resController.Reconcile(ctx, resReq) //nolint:errcheck + } + env.crController.Reconcile(ctx, crReq) //nolint:errcheck + } + + var final v1alpha1.CommittedResource + if err := env.k8sClient.Get(ctx, types.NamespacedName{Name: cr.Name}, &final); err != nil { + t.Fatalf("get CR: %v", err) + } + cond := meta.FindStatusCondition(final.Status.Conditions, v1alpha1.CommittedResourceConditionReady) + if cond == nil { + t.Fatalf("no Ready condition after retries") + } + if cond.Reason == v1alpha1.CommittedResourceReasonRejected { + t.Errorf("AllowRejection=false: CR must not be Rejected, got Reason=%s", cond.Reason) + } + if cond.Status != metav1.ConditionTrue || cond.Reason != v1alpha1.CommittedResourceReasonAccepted { + t.Errorf("AllowRejection=false: expected Ready=True/Accepted after retries, got Ready=%s/Reason=%s", cond.Status, cond.Reason) + } + }) +} diff --git a/internal/scheduling/reservations/commitments/reservation_controller.go b/internal/scheduling/reservations/commitments/reservation_controller.go index 96d86aeb2..b65842c60 100644 --- a/internal/scheduling/reservations/commitments/reservation_controller.go +++ b/internal/scheduling/reservations/commitments/reservation_controller.go @@ -73,6 +73,7 @@ func (r *CommitmentReservationController) Reconcile(ctx context.Context, req ctr } else { ctx = WithNewGlobalRequestID(ctx) } + ctx = reservations.WithRequestID(ctx, req.Name) logger := LoggerFromContext(ctx).WithValues("component", "controller", "reservation", req.Name) // filter for CR reservations diff --git a/internal/scheduling/reservations/commitments/reservation_manager.go b/internal/scheduling/reservations/commitments/reservation_manager.go index d1fa28fda..6d70bcd20 100644 --- a/internal/scheduling/reservations/commitments/reservation_manager.go +++ b/internal/scheduling/reservations/commitments/reservation_manager.go @@ -172,8 +172,7 @@ func (m *ReservationManager) ApplyCommitmentState( // Phase 5 (CREATE): Create new reservations (capacity increased) for deltaMemoryBytes > 0 { - // Need to create new reservation slots, always prefer largest flavor within the group - // TODO more sophisticated flavor selection, especially with flavors of different cpu/memory ratio + // Select the largest flavor that fits the remaining delta (flavors sorted descending by memory). reservation := m.newReservation(desiredState, nextSlotIndex, deltaMemoryBytes, flavorGroup, creator) result.TouchedReservations = append(result.TouchedReservations, *reservation) memValue := reservation.Spec.Resources[hv1.ResourceMemory] @@ -283,7 +282,8 @@ func (m *ReservationManager) newReservation( } name := fmt.Sprintf("%s%d", namePrefix, slotIndex) - // Select first flavor that fits remaining memory (flavors sorted descending by size) + // Select largest flavor that fits remaining memory (flavors sorted descending by memory then vCPUs). + // This works for both fixed and varying CPU:RAM ratio groups. flavorInGroup := flavorGroup.Flavors[len(flavorGroup.Flavors)-1] // default to smallest memoryBytes := deltaMemoryBytes cpus := int64(flavorInGroup.VCPUs) //nolint:gosec // VCPUs from flavor specs, realistically bounded diff --git a/internal/scheduling/reservations/commitments/reservation_manager_test.go b/internal/scheduling/reservations/commitments/reservation_manager_test.go index b512fc9b5..bb2fdaf52 100644 --- a/internal/scheduling/reservations/commitments/reservation_manager_test.go +++ b/internal/scheduling/reservations/commitments/reservation_manager_test.go @@ -342,3 +342,86 @@ func TestNewReservation_SelectsAppropriateFlavor(t *testing.T) { }) } } + +// variableRatioFlavorGroup returns a flavor group with varying CPU:RAM ratios (GP-style). +// Flavors are sorted descending by memory then vCPUs, matching the knowledge extractor order. +func variableRatioFlavorGroup() compute.FlavorGroupFeature { + minRatio := uint64(2048) // MiB/vCPU + maxRatio := uint64(8192) // MiB/vCPU + return compute.FlavorGroupFeature{ + Name: "gp-group", + Flavors: []compute.FlavorInGroup{ + {Name: "c4_m32", VCPUs: 4, MemoryMB: 32768, DiskGB: 100}, // 8 GiB/vCPU + {Name: "c8_m16", VCPUs: 8, MemoryMB: 16384, DiskGB: 50}, // 2 GiB/vCPU + {Name: "c4_m8", VCPUs: 4, MemoryMB: 8192, DiskGB: 25}, // 2 GiB/vCPU + }, + SmallestFlavor: compute.FlavorInGroup{Name: "c4_m8", VCPUs: 4, MemoryMB: 8192, DiskGB: 25}, + LargestFlavor: compute.FlavorInGroup{Name: "c4_m32", VCPUs: 4, MemoryMB: 32768, DiskGB: 100}, + RamCoreRatioMin: &minRatio, + RamCoreRatioMax: &maxRatio, + } +} + +func TestNewReservation_VariableRatioGroup_SelectsLargestByMemory(t *testing.T) { + // For GP (variable CPU:RAM ratio) groups, flavor selection is driven by memory + // descending, not by ratio. The largest flavor fitting the delta is always chosen. + manager := &ReservationManager{} + fg := variableRatioFlavorGroup() + + tests := []struct { + name string + deltaMemoryMB int64 + wantFlavor string + wantCores int64 + }{ + { + name: "delta fits c4_m32: picks largest by memory", + deltaMemoryMB: 32768, + wantFlavor: "c4_m32", + wantCores: 4, + }, + { + name: "delta larger than all: picks largest (c4_m32)", + deltaMemoryMB: 65536, + wantFlavor: "c4_m32", + wantCores: 4, + }, + { + name: "delta between c4_m32 and c8_m16: picks c8_m16", + deltaMemoryMB: 24576, // 24 GiB — c8_m16 (16 GiB) fits, c4_m32 (32 GiB) doesn't + wantFlavor: "c8_m16", + wantCores: 8, + }, + { + name: "delta equals c8_m16: picks c8_m16 (more vCPUs than c4_m8 at same memory)", + deltaMemoryMB: 16384, + wantFlavor: "c8_m16", + wantCores: 8, + }, + { + name: "delta fits only c4_m8: picks smallest", + deltaMemoryMB: 8192, + wantFlavor: "c4_m8", + wantCores: 4, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + deltaBytes := tt.deltaMemoryMB * 1024 * 1024 + state := &CommitmentState{ + CommitmentUUID: "test-uuid", + ProjectID: "project-1", + FlavorGroupName: "gp-group", + } + res := manager.newReservation(state, 0, deltaBytes, fg, "test") + if got := res.Spec.CommittedResourceReservation.ResourceName; got != tt.wantFlavor { + t.Errorf("flavor: want %s, got %s", tt.wantFlavor, got) + } + cpuQty := res.Spec.Resources[hv1.ResourceCPU] + if got := cpuQty.Value(); got != tt.wantCores { + t.Errorf("cores: want %d, got %d", tt.wantCores, got) + } + }) + } +}