From 6349b8557a06f8e4cd4298b4526ab29810182e4c Mon Sep 17 00:00:00 2001 From: Evan Nemerson Date: Mon, 9 Feb 2026 10:26:24 -0500 Subject: [PATCH] CP-37576: Use CloudZero fork of Alloy Switch the clustered mode Alloy deployment from the official Grafana Alloy container image to the CloudZero Alloy fork. The CloudZero Alloy binary is embedded directly in the agent Docker image, so the Alloy container now uses the same image as all other agent containers rather than pulling a separate image. Functional Change: Before: Clustered mode pulled and ran Alloy from a separate container image (docker.io/grafana/alloy:v1.11.3). After: Clustered mode runs the CloudZero Alloy binary embedded in the agent image. The Alloy container uses the agent image with /app/cloudzero-alloy as the default command. Users can override both the image and command via clusteredNode.image and clusteredNode.command to use a different Alloy image (e.g., the official Grafana Alloy). Solution: 1. Added a new Dockerfile stage that pulls from the CloudZero Alloy image (ghcr.io/evan-cz/alloy) and copies /bin/cloudzero-alloy into /app/ in the final image. 2. Updated the Alloy container in helm/templates/agent-deploy.yaml to use the agent image (components.agent.image) by default, with clusteredNode.image as an override layer via the compat parameter of generateImage. 3. Added a generateContainerCommand helper to _helpers.tpl with three-state logic: null uses the default command (/app/cloudzero-alloy), an empty array uses the image's default entrypoint, and a non-empty array uses the specified command. 4. Changed clusteredNode.image defaults in helm/values.yaml from docker.io/grafana/alloy:v1.11.3 to null/null so they fall back to components.agent.image. Added clusteredNode.command (default null). Updated values.schema.yaml accordingly. 5. Updated Helm tests (alloy_deployment_test.yaml, alloy_image_configuration_test.yaml) to validate the new image source and command configuration. Validation: - All 481 Helm unit tests pass across 64 test suites - Docker image built and pushed successfully for both linux/amd64 and linux/arm64 via `make package-debug` - Deployed to GKE cluster with `mode: clustered` --- app/functions/helmless/default-values.yaml | 31 +++++++++++++++---- docker/Dockerfile | 11 +++++-- helm/templates/_helpers.tpl | 20 ++++++++++++ helm/templates/agent-deploy.yaml | 6 ++-- helm/tests/alloy_deployment_test.yaml | 6 +++- .../tests/alloy_image_configuration_test.yaml | 27 +++++++--------- helm/values.schema.json | 10 ++++++ helm/values.schema.yaml | 13 +++++++- helm/values.yaml | 31 +++++++++++++++---- tests/helm/template/alloy.yaml | 11 ++++--- tests/helm/template/cert-manager.yaml | 5 +-- tests/helm/template/federated.yaml | 5 +-- tests/helm/template/istio.yaml | 5 +-- tests/helm/template/manifest.yaml | 5 +-- 14 files changed, 140 insertions(+), 46 deletions(-) diff --git a/app/functions/helmless/default-values.yaml b/app/functions/helmless/default-values.yaml index 890b431b..9a71cc82 100644 --- a/app/functions/helmless/default-values.yaml +++ b/app/functions/helmless/default-values.yaml @@ -304,9 +304,9 @@ components: # per node. This is equivalent to setting defaults.federation.enabled to true. # Uses Prometheus in server mode for metrics collection. # - # - "clustered": EXPERIMENTAL. Uses Grafana Alloy instead of Prometheus for - # metrics collection. Alloy provides better performance and native - # horizontal scalability. + # - "clustered": EXPERIMENTAL. Uses the embedded CloudZero Alloy binary for + # metrics collection instead of Prometheus. Alloy provides better + # performance and native horizontal scalability. # # - null means automatic mode (currently defaults to "agent"). mode: null @@ -406,10 +406,29 @@ components: # The agent clustered node component configuration. # Only applies when components.agent.mode is set to "clustered". clusteredNode: - # Container image configuration for Grafana Alloy. + # Container image override for the Alloy container. + # + # By default, the CloudZero Alloy binary is embedded in the agent + # image and the Alloy container uses the same image as other agent + # containers (components.agent.image). Set repository/tag here only + # if you need to override the Alloy container image separately. image: - repository: docker.io/grafana/alloy - tag: v1.11.3 + repository: null + tag: null + # Command to run in the Alloy container. + # + # - null (default): Uses /app/cloudzero-alloy (CloudZero Alloy binary) + # - []: Uses the image's default entrypoint (for official Alloy image) + # - ["/custom/path"]: Uses the specified command + # + # To use the official Grafana Alloy image instead of the embedded binary: + # + # image: + # repository: docker.io/grafana/alloy + # tag: vX.Y.Z + # command: [] + # + command: # Resource requirements and limits for the container. # diff --git a/docker/Dockerfile b/docker/Dockerfile index f188a752..34dd7346 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -5,7 +5,8 @@ ARG DEPLOY_IMAGE=scratch # 2. dependencies: Download Go modules (cached per platform) # 3. builder: Build Go binaries (cached per platform) # 4. certs: Extract certificates from distroless image -# 5. final: Minimal runtime image with compiled binaries +# 5. alloy: Extract Alloy binary from CloudZero Alloy image +# 6. final: Minimal runtime image with compiled binaries # Stage 1: Base tools installation FROM --platform=$BUILDPLATFORM golang:1.25.6-alpine AS base-tools @@ -69,7 +70,10 @@ RUN --mount=type=cache,target=/go/pkg/mod,id=gomod-$TARGETPLATFORM \ # Stage 4: Access current certs FROM gcr.io/distroless/static-debian12:debug@sha256:20d9c135406d8029d30d59eaaa9d62d2edcd4ec5915dbcda324243c40460e8df AS certs -# Stage 5: Final runtime image +# Stage 5: Extract Alloy binary from CloudZero Alloy image +FROM ghcr.io/evan-cz/alloy:latest AS alloy + +# Stage 6: Final runtime image # Note: For debugging, you can temporarily change the image used for building by # passing in something like this to 'docker build': # @@ -113,6 +117,9 @@ COPY --from=builder /go/bin/cloudzero-helmless /app/cloudzero-helmless COPY --from=builder /go/bin/cloudzero-scout /app/cloudzero-scout COPY --from=builder /go/bin/cloudzero-certifik8s /app/cloudzero-certifik8s +# Copy the Alloy binary from the alloy stage +COPY --from=alloy /bin/cloudzero-alloy /app/cloudzero-alloy + # Allow the default ENTRYPOINT from busybox to be the default, # however run the app as the default command CMD ["/app/cloudzero-agent-validator", "-h"] diff --git a/helm/templates/_helpers.tpl b/helm/templates/_helpers.tpl index 6563e467..32025350 100644 --- a/helm/templates/_helpers.tpl +++ b/helm/templates/_helpers.tpl @@ -838,6 +838,26 @@ configuration will not overwrite values from the first configuration. {{- end -}} {{- end -}} +{{/* +Generate container command with special handling: +- null/not set: Uses provided default command array +- empty array []: No command output (uses image's default entrypoint) +- non-empty array: Uses the specified command + +Usage: {{ include "cloudzero-agent.generateContainerCommand" (dict "command" .Values.components.agent.clusteredNode.command "default" (list "/app/cloudzero-alloy")) | nindent 10 }} +*/}} +{{- define "cloudzero-agent.generateContainerCommand" -}} +{{- $isEmptyArray := and (kindIs "slice" .command) (empty .command) -}} +{{- if not $isEmptyArray -}} +command: + {{- if kindIs "invalid" .command }} + {{- toYaml .default | nindent 2 }} + {{- else }} + {{- toYaml .command | nindent 2 }} + {{- end }} +{{- end -}} +{{- end -}} + {{/* Generate image configuration with defaults. */}} diff --git a/helm/templates/agent-deploy.yaml b/helm/templates/agent-deploy.yaml index 4660d1cf..0b7f5411 100644 --- a/helm/templates/agent-deploy.yaml +++ b/helm/templates/agent-deploy.yaml @@ -227,10 +227,10 @@ spec: {{- include "cloudzero-agent.apiKeyVolumeMount" . | nindent 12 }} {{- end }}{{/* End Prometheus container */}} {{- if eq (include "cloudzero-agent.Values.components.agent.mode" .) "clustered" }} - # Grafana Alloy container + # CloudZero Alloy container (binary embedded in agent image) - name: {{ template "cloudzero-agent.name" . }}-alloy - {{- $alloyImage := include "cloudzero-agent.agentCollectorImage" . | fromYaml }} - {{- include "cloudzero-agent.generateImage" (dict "defaults" .Values.defaults.image "image" $alloyImage) | nindent 10 }} + {{- include "cloudzero-agent.generateImage" (dict "defaults" .Values.defaults.image "image" .Values.components.agent.image "compat" .Values.components.agent.clusteredNode.image) | nindent 10 }} + {{- include "cloudzero-agent.generateContainerCommand" (dict "command" .Values.components.agent.clusteredNode.command "default" (list "/app/cloudzero-alloy")) | nindent 10 }} args: - run - /etc/alloy/alloy-config.river diff --git a/helm/tests/alloy_deployment_test.yaml b/helm/tests/alloy_deployment_test.yaml index ed62b5bb..077f6465 100644 --- a/helm/tests/alloy_deployment_test.yaml +++ b/helm/tests/alloy_deployment_test.yaml @@ -32,7 +32,7 @@ tests: template: templates/agent-deploy.yaml - matchRegex: path: spec.template.spec.containers[1].image - pattern: grafana/alloy + pattern: cloudzero-agent template: templates/agent-deploy.yaml - isNull: path: data["prometheus.yml"] @@ -84,6 +84,10 @@ tests: set: components.agent.mode: clustered asserts: + - contains: + path: spec.template.spec.containers[1].command + content: /app/cloudzero-alloy + template: templates/agent-deploy.yaml - contains: path: spec.template.spec.containers[1].args content: run diff --git a/helm/tests/alloy_image_configuration_test.yaml b/helm/tests/alloy_image_configuration_test.yaml index 53bc816a..34eb8f5a 100644 --- a/helm/tests/alloy_image_configuration_test.yaml +++ b/helm/tests/alloy_image_configuration_test.yaml @@ -2,36 +2,33 @@ suite: test Alloy image configuration templates: - templates/agent-deploy.yaml tests: - # Test default Alloy image - - it: should use default Alloy image from components.agent.clusteredNode.image + # Test that Alloy container uses the agent image (Alloy binary is embedded) + - it: should use the agent image for the Alloy container set: components.agent.mode: clustered asserts: - matchRegex: path: spec.template.spec.containers[1].image - pattern: docker.io/grafana/alloy:v1.11.3 + pattern: cloudzero-agent - # Test custom Alloy image - - it: should use custom Alloy image when specified + # Test custom agent image applies to Alloy container + - it: should use custom agent image when specified set: components.agent.mode: clustered - components.agent.clusteredNode.image: - repository: custom.registry.io/custom/alloy + components.agent.image: + repository: custom.registry.io/custom/agent tag: v2.0.0 asserts: - matchRegex: path: spec.template.spec.containers[1].image - pattern: custom.registry.io/custom/alloy:v2.0.0 + pattern: custom.registry.io/custom/agent:v2.0.0 - # Test that alloy image is only used for Alloy + # Test that Alloy image config does not affect Prometheus image - it: should not affect Prometheus image set: components.agent.mode: agent components.agent.autoscaling: enabled: false - components.agent.clusteredNode.image: - repository: custom.registry.io/custom/alloy - tag: v2.0.0 asserts: - matchRegex: path: spec.template.spec.containers[1].image @@ -40,11 +37,11 @@ tests: path: spec.template.spec.containers[1].image pattern: alloy - # Test image pull policy from components.agent.clusteredNode.image - - it: should respect pullPolicy from components.agent.clusteredNode.image + # Test image pull policy from components.agent.image + - it: should respect pullPolicy from components.agent.image set: components.agent.mode: clustered - components.agent.clusteredNode.image: + components.agent.image: pullPolicy: Always asserts: - equal: diff --git a/helm/values.schema.json b/helm/values.schema.json index cdd5018d..a2f6e3ac 100644 --- a/helm/values.schema.json +++ b/helm/values.schema.json @@ -5856,6 +5856,16 @@ "clusteredNode": { "additionalProperties": false, "properties": { + "command": { + "oneOf": [ + { + "$ref": "#/$defs/io.k8s.api.core.v1.Container/properties/command" + }, + { + "type": "null" + } + ] + }, "image": { "$ref": "#/$defs/com.cloudzero.agent.image" }, diff --git a/helm/values.schema.yaml b/helm/values.schema.yaml index d328c874..9e939e90 100644 --- a/helm/values.schema.yaml +++ b/helm/values.schema.yaml @@ -666,8 +666,19 @@ properties: properties: image: description: | - Container image configuration for Grafana Alloy. + Container image override for the Alloy container. By default, the Alloy + container uses the agent image (components.agent.image) which embeds the + CloudZero Alloy binary. Set repository/tag here to override separately. $ref: "#/$defs/com.cloudzero.agent.image" + command: + description: | + Command to run in the Alloy container. + - null (default): Uses /app/cloudzero-alloy (CloudZero Alloy binary) + - [] (empty array): Uses the image's default entrypoint + - ["/custom/path"]: Uses the specified command + oneOf: + - $ref: "#/$defs/io.k8s.api.core.v1.Container/properties/command" + - type: "null" resources: description: | Resource requirements and limits for the Alloy container. diff --git a/helm/values.yaml b/helm/values.yaml index 6e08435b..491a4766 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -304,9 +304,9 @@ components: # per node. This is equivalent to setting defaults.federation.enabled to true. # Uses Prometheus in server mode for metrics collection. # - # - "clustered": EXPERIMENTAL. Uses Grafana Alloy instead of Prometheus for - # metrics collection. Alloy provides better performance and native - # horizontal scalability. + # - "clustered": EXPERIMENTAL. Uses the embedded CloudZero Alloy binary for + # metrics collection instead of Prometheus. Alloy provides better + # performance and native horizontal scalability. # # - null means automatic mode (currently defaults to "agent"). mode: null @@ -406,10 +406,29 @@ components: # The agent clustered node component configuration. # Only applies when components.agent.mode is set to "clustered". clusteredNode: - # Container image configuration for Grafana Alloy. + # Container image override for the Alloy container. + # + # By default, the CloudZero Alloy binary is embedded in the agent + # image and the Alloy container uses the same image as other agent + # containers (components.agent.image). Set repository/tag here only + # if you need to override the Alloy container image separately. image: - repository: docker.io/grafana/alloy - tag: v1.11.3 + repository: null + tag: null + # Command to run in the Alloy container. + # + # - null (default): Uses /app/cloudzero-alloy (CloudZero Alloy binary) + # - []: Uses the image's default entrypoint (for official Alloy image) + # - ["/custom/path"]: Uses the specified command + # + # To use the official Grafana Alloy image instead of the embedded binary: + # + # image: + # repository: docker.io/grafana/alloy + # tag: vX.Y.Z + # command: [] + # + command: # Resource requirements and limits for the container. # diff --git a/tests/helm/template/alloy.yaml b/tests/helm/template/alloy.yaml index 22092d5f..c296eb7a 100644 --- a/tests/helm/template/alloy.yaml +++ b/tests/helm/template/alloy.yaml @@ -966,9 +966,10 @@ data: annotations: {} autoscaling: null clusteredNode: + command: null image: - repository: docker.io/grafana/alloy - tag: v1.11.3 + repository: null + tag: null resources: limits: cpu: 100m @@ -2601,10 +2602,12 @@ spec: - name: config-volume mountPath: /etc/config readOnly: true - # Grafana Alloy container + # CloudZero Alloy container (binary embedded in agent image) - name: cloudzero-agent-alloy - image: "docker.io/grafana/alloy:v1.11.3" + image: "ghcr.io/cloudzero/cloudzero-agent/cloudzero-agent:1.2.9" imagePullPolicy: "IfNotPresent" + command: + - /app/cloudzero-alloy args: - run - /etc/alloy/alloy-config.river diff --git a/tests/helm/template/cert-manager.yaml b/tests/helm/template/cert-manager.yaml index 9ff067bb..c99a9391 100644 --- a/tests/helm/template/cert-manager.yaml +++ b/tests/helm/template/cert-manager.yaml @@ -883,9 +883,10 @@ data: annotations: {} autoscaling: null clusteredNode: + command: null image: - repository: docker.io/grafana/alloy - tag: v1.11.3 + repository: null + tag: null resources: limits: cpu: 100m diff --git a/tests/helm/template/federated.yaml b/tests/helm/template/federated.yaml index d6635ce2..80c4e415 100644 --- a/tests/helm/template/federated.yaml +++ b/tests/helm/template/federated.yaml @@ -971,9 +971,10 @@ data: annotations: {} autoscaling: null clusteredNode: + command: null image: - repository: docker.io/grafana/alloy - tag: v1.11.3 + repository: null + tag: null resources: limits: cpu: 100m diff --git a/tests/helm/template/istio.yaml b/tests/helm/template/istio.yaml index 84b4ab51..f3fb390d 100644 --- a/tests/helm/template/istio.yaml +++ b/tests/helm/template/istio.yaml @@ -898,9 +898,10 @@ data: annotations: {} autoscaling: null clusteredNode: + command: null image: - repository: docker.io/grafana/alloy - tag: v1.11.3 + repository: null + tag: null resources: limits: cpu: 100m diff --git a/tests/helm/template/manifest.yaml b/tests/helm/template/manifest.yaml index 0e2de51a..edcc5452 100644 --- a/tests/helm/template/manifest.yaml +++ b/tests/helm/template/manifest.yaml @@ -898,9 +898,10 @@ data: annotations: {} autoscaling: null clusteredNode: + command: null image: - repository: docker.io/grafana/alloy - tag: v1.11.3 + repository: null + tag: null resources: limits: cpu: 100m