Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions api/v1alpha1/flavor_group_capacity_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
package v1alpha1

import (
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

Expand Down Expand Up @@ -56,6 +57,10 @@ type FlavorGroupCapacityStatus struct {
// +kubebuilder:validation:Optional
CommittedCapacity int64 `json:"committedCapacity,omitempty"`

// TotalCapacity is the total capacity of all eligible hosts in an empty-datacenter scenario.
// +kubebuilder:validation:Optional
TotalCapacity map[string]resource.Quantity `json:"totalCapacity,omitempty"`

// TotalInstances is the total number of VM instances running on hypervisors in this AZ,
// derived from Hypervisor CRD Status.Instances (not filtered by flavor group).
// +kubebuilder:validation:Optional
Expand Down
7 changes: 7 additions & 0 deletions api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,16 @@ spec:
reconcile.
format: date-time
type: string
totalCapacity:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: TotalCapacity is the total capacity of all eligible hosts
in an empty-datacenter scenario.
type: object
totalInstances:
description: |-
TotalInstances is the total number of VM instances running on hypervisors in this AZ,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ type FlavorGroupFeature struct {
// The largest flavor in the group (used for reservation slot sizing)
LargestFlavor FlavorInGroup `json:"largestFlavor"`

// The smallest flavor in the group (used for CR size quantification)
// The smallest flavor in the group
SmallestFlavor FlavorInGroup `json:"smallestFlavor"`

// RAM-to-core ratio in MiB per vCPU (MemoryMB / VCPUs).
Expand Down
33 changes: 33 additions & 0 deletions internal/scheduling/reservations/capacity/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"github.com/google/uuid"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
ctrl "sigs.k8s.io/controller-runtime"
Expand Down Expand Up @@ -172,10 +173,42 @@ func (c *Controller) reconcileOne(
committedCapacity = 0
}

// Compute TotalCapacity: for each flavor multiply slot count by its RAM/CPU,
// then take the max across all flavors independently for each resource.
// This reveals the most capacity because the flavor best matching the host's
// resource ratio saturates more resources and produces a higher product.
flavorSpecByName := make(map[string]compute.FlavorInGroup, len(groupData.Flavors))
for _, f := range groupData.Flavors {
flavorSpecByName[f.Name] = f
}
totalCapacity := make(map[string]resource.Quantity)
var maxMemBytes, maxCPUCores int64
for _, f := range newFlavors {
spec, ok := flavorSpecByName[f.FlavorName]
if !ok || f.TotalCapacityVMSlots <= 0 {
continue
}
memBytes := f.TotalCapacityVMSlots * int64(spec.MemoryMB) * 1024 * 1024 //nolint:gosec
cpuCores := f.TotalCapacityVMSlots * int64(spec.VCPUs) //nolint:gosec
if memBytes > maxMemBytes {
maxMemBytes = memBytes
}
if cpuCores > maxCPUCores {
maxCPUCores = cpuCores
}
}
if maxMemBytes > 0 {
totalCapacity["memory"] = *resource.NewQuantity(maxMemBytes, resource.BinarySI)
}
if maxCPUCores > 0 {
totalCapacity["cpu"] = *resource.NewQuantity(maxCPUCores, resource.DecimalSI)
}

patch := client.MergeFrom(existing.DeepCopy())
existing.Status.Flavors = newFlavors
existing.Status.TotalInstances = totalInstances
existing.Status.CommittedCapacity = committedCapacity
existing.Status.TotalCapacity = totalCapacity
existing.Status.LastReconcileAt = metav1.Now()

freshCondition := metav1.Condition{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -190,8 +190,7 @@ ProcessLoop:
break ProcessLoop
}

flavorGroup, ok := flavorGroups[flavorGroupName]
if !ok {
if _, ok := flavorGroups[flavorGroupName]; !ok {
failedReason = "flavor group not found: " + flavorGroupName
rollback = true
break ProcessLoop
Expand Down Expand Up @@ -249,7 +248,7 @@ ProcessLoop:
}

stateDesired, err := commitments.FromChangeCommitmentTargetState(
commitment, string(projectID), domainID, flavorGroupName, flavorGroup, string(req.AZ))
commitment, string(projectID), domainID, flavorGroupName, string(req.AZ))
if err != nil {
failedReason = fmt.Sprintf("commitment %s: %s", commitment.UUID, err)
rollback = true
Expand Down
36 changes: 5 additions & 31 deletions internal/scheduling/reservations/commitments/api/info.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ package api
import (
"context"
"encoding/json"
"errors"
"fmt"
"net/http"
"strconv"
Expand All @@ -20,9 +19,6 @@ import (
liquid "github.com/sapcc/go-api-declarations/liquid"
)

// errInternalServiceInfo indicates an internal error while building service info (e.g., invalid unit configuration)
var errInternalServiceInfo = errors.New("internal error building service info")

// handles GET /commitments/v1/info requests from Limes:
// See: https://github.com/sapcc/go-api-declarations/blob/main/liquid/commitment.go
// See: https://pkg.go.dev/github.com/sapcc/go-api-declarations/liquid
Expand Down Expand Up @@ -54,16 +50,9 @@ func (api *HTTPAPI) HandleInfo(w http.ResponseWriter, r *http.Request) {
// Build info response
info, err := api.buildServiceInfo(ctx, logger)
if err != nil {
if errors.Is(err, errInternalServiceInfo) {
logger.Error(err, "internal error building service info")
statusCode = http.StatusInternalServerError
http.Error(w, "Internal server error: "+err.Error(), statusCode)
} else {
// Use Info level for expected conditions like knowledge not being ready yet
logger.Info("service info not available yet", "error", err.Error())
statusCode = http.StatusServiceUnavailable
http.Error(w, "Service temporarily unavailable: "+err.Error(), statusCode)
}
logger.Info("service info not available yet", "error", err.Error())
statusCode = http.StatusServiceUnavailable
http.Error(w, "Service temporarily unavailable: "+err.Error(), statusCode)
Comment on lines +53 to +55
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Keep the 503 body generic instead of echoing err.Error().

This exposes internal backend details through the public API response. The concrete failure should stay in logs; clients only need a stable 503 service temporarily unavailable message.

Proposed fix
 		logger.Info("service info not available yet", "error", err.Error())
 		statusCode = http.StatusServiceUnavailable
-		http.Error(w, "Service temporarily unavailable: "+err.Error(), statusCode)
+		http.Error(w, "service temporarily unavailable", statusCode)
 		api.recordInfoMetrics(statusCode, startTime)
 		return
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
logger.Info("service info not available yet", "error", err.Error())
statusCode = http.StatusServiceUnavailable
http.Error(w, "Service temporarily unavailable: "+err.Error(), statusCode)
logger.Info("service info not available yet", "error", err.Error())
statusCode = http.StatusServiceUnavailable
http.Error(w, "service temporarily unavailable", statusCode)
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@internal/scheduling/reservations/commitments/api/info.go` around lines 53 -
55, The response body currently echoes internal error text; change the
http.Error call in internal/scheduling/reservations/commitments/api/info.go so
it returns a generic message (e.g., "Service temporarily unavailable") instead
of concatenating err.Error(), keep statusCode = http.StatusServiceUnavailable,
and ensure the detailed error remains in logs (the existing logger.Info("service
info not available yet", "error", err.Error()) can stay or be promoted to
logger.Error if desired); update the http.Error invocation that references
statusCode and err.Error() to use the generic message only.

api.recordInfoMetrics(statusCode, startTime)
return
}
Expand Down Expand Up @@ -133,20 +122,8 @@ func (api *HTTPAPI) buildServiceInfo(ctx context.Context, logger logr.Logger) (l
attrsJSON = nil
}

// Validate memory is positive to avoid panic in MultiplyBy (which panics on factor=0)
if groupData.SmallestFlavor.MemoryMB == 0 {
return liquid.ServiceInfo{}, fmt.Errorf("%w: flavor group %q has invalid smallest flavor with memoryMB=0",
errInternalServiceInfo, groupName)
}

// === 1. RAM Resource ===
ramResourceName := liquid.ResourceName(commitments.ResourceNameRAM(groupName))
ramUnit, err := liquid.UnitMebibytes.MultiplyBy(groupData.SmallestFlavor.MemoryMB)
if err != nil {
// Note: This error only occurs on uint64 overflow, which is unrealistic for memory values
return liquid.ServiceInfo{}, fmt.Errorf("%w: failed to create unit for flavor group %q: %w",
errInternalServiceInfo, groupName, err)
}
// Determine topology: AZSeparatedTopology only for groups that accept commitments
// (AZSeparatedTopology means quota is also AZ-aware, required when HasQuota=true)
ramTopology := liquid.AZAwareTopology
Expand All @@ -155,11 +132,10 @@ func (api *HTTPAPI) buildServiceInfo(ctx context.Context, logger logr.Logger) (l
}
resources[ramResourceName] = liquid.ResourceInfo{
DisplayName: fmt.Sprintf(
"multiples of %d MiB (usable by: %s)",
groupData.SmallestFlavor.MemoryMB,
"GiB of RAM (usable by: %s)",
flavorListStr,
),
Unit: ramUnit,
Unit: liquid.UnitGibibytes,
Topology: ramTopology,
NeedsResourceDemand: false,
HasCapacity: resCfg.RAM.HasCapacity,
Expand Down Expand Up @@ -205,8 +181,6 @@ func (api *HTTPAPI) buildServiceInfo(ctx context.Context, logger logr.Logger) (l
"ramResource", ramResourceName,
"coresResource", coresResourceName,
"instancesResource", instancesResourceName,
"smallestFlavor", groupData.SmallestFlavor.Name,
"smallestRamMB", groupData.SmallestFlavor.MemoryMB,
"ramCoreRatio", groupData.RamCoreRatio)
}

Expand Down
15 changes: 7 additions & 8 deletions internal/scheduling/reservations/commitments/api/info_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,16 +78,15 @@ func TestHandleInfo_MethodNotAllowed(t *testing.T) {
}

func TestHandleInfo_InvalidFlavorMemory(t *testing.T) {
// Test that a 500 Internal Server Error is returned when a flavor group has invalid data.
//
// A flavor with memoryMB=0 is invalid and should trigger an HTTP 500 error.
// Such data could occur from a bug in the flavor groups extractor.
// Test that the info endpoint succeeds even when a flavor group has memoryMB=0.
// With the fixed GiB unit, we no longer reject zero-memory flavors at the info level;
// they result in zero capacity at the capacity reporting level instead.
scheme := runtime.NewScheme()
if err := v1alpha1.AddToScheme(scheme); err != nil {
t.Fatalf("failed to add scheme: %v", err)
}

// Create flavor group with memoryMB=0 (invalid data that could come from a buggy extractor)
// Create flavor group with memoryMB=0 (edge case from a buggy extractor)
features := []map[string]interface{}{
{
"name": "invalid_group",
Expand Down Expand Up @@ -132,9 +131,9 @@ func TestHandleInfo_InvalidFlavorMemory(t *testing.T) {
resp := w.Result()
defer resp.Body.Close()

// Should return 500 Internal Server Error when unit creation fails
if resp.StatusCode != http.StatusInternalServerError {
t.Errorf("expected status code %d (Internal Server Error), got %d", http.StatusInternalServerError, resp.StatusCode)
// Should return 200 OK — zero-memory flavor no longer causes an error
if resp.StatusCode != http.StatusOK {
t.Errorf("expected status code %d (OK), got %d", http.StatusOK, resp.StatusCode)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ func (api *HTTPAPI) HandleReportCapacity(w http.ResponseWriter, r *http.Request)
}

// Calculate capacity
calculator := commitments.NewCapacityCalculator(api.client)
calculator := commitments.NewCapacityCalculator(api.client, api.config)
report, err := calculator.CalculateCapacity(ctx, req)
if err != nil {
logger.Error(err, "failed to calculate capacity")
Expand Down
Loading
Loading