From 39f1dd8541c2d6aee401e99eb85090b97130fb1a Mon Sep 17 00:00:00 2001
From: Marius Svechla <m.svechla@gmail.com>
Date: Fri, 15 Nov 2024 14:46:06 +0100
Subject: [PATCH] fix: add autoscaling for tempo-distributed metrics-generator

This adds autoscaling via hpa and keda for tempo-distributed
metrics-generator. Implementation is analog to the already existing
autoscaling options for the compactor.

Signed-off-by: Marius Svechla <m.svechla@gmail.com>
---
 charts/tempo-distributed/Chart.yaml           |  2 +-
 charts/tempo-distributed/README.md            | 12 ++++-
 .../templates/metrics-generator/hpa.yaml      | 47 +++++++++++++++++++
 .../metrics-generator/keda-scaled-object.yaml | 26 ++++++++++
 charts/tempo-distributed/values.yaml          | 30 ++++++++++++
 5 files changed, 115 insertions(+), 2 deletions(-)
 create mode 100644 charts/tempo-distributed/templates/metrics-generator/hpa.yaml
 create mode 100644 charts/tempo-distributed/templates/metrics-generator/keda-scaled-object.yaml

diff --git a/charts/tempo-distributed/Chart.yaml b/charts/tempo-distributed/Chart.yaml
index b46aa73790..9f429ef908 100644
--- a/charts/tempo-distributed/Chart.yaml
+++ b/charts/tempo-distributed/Chart.yaml
@@ -2,7 +2,7 @@ apiVersion: v2
 name: tempo-distributed
 description: Grafana Tempo in MicroService mode
 type: application
-version: 1.22.0
+version: 1.23.0
 appVersion: 2.6.0
 engine: gotpl
 home: https://grafana.com/docs/tempo/latest/
diff --git a/charts/tempo-distributed/README.md b/charts/tempo-distributed/README.md
index d9d847dfe6..95ee18ceae 100755
--- a/charts/tempo-distributed/README.md
+++ b/charts/tempo-distributed/README.md
@@ -1,6 +1,6 @@
 # tempo-distributed
 
-![Version: 1.22.0](https://img.shields.io/badge/Version-1.22.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 2.6.0](https://img.shields.io/badge/AppVersion-2.6.0-informational?style=flat-square)
+![Version: 1.23.0](https://img.shields.io/badge/Version-1.23.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 2.6.0](https://img.shields.io/badge/AppVersion-2.6.0-informational?style=flat-square)
 
 Grafana Tempo in MicroService mode
 
@@ -638,6 +638,16 @@ The memcached default args are removed and should be provided manually. The sett
 | metricsGenerator.annotations | object | `{}` | Annotations for the metrics-generator StatefulSet |
 | metricsGenerator.appProtocol | object | `{"grpc":null}` | Adds the appProtocol field to the metricsGenerator service. This allows metricsGenerator to work with istio protocol selection. |
 | metricsGenerator.appProtocol.grpc | string | `nil` | Set the optional grpc service protocol. Ex: "grpc", "http2" or "https" |
+| metricsGenerator.autoscaling | object | `{"enabled":false,"hpa":{"behavior":{},"enabled":false,"targetCPUUtilizationPercentage":100,"targetMemoryUtilizationPercentage":null},"keda":{"enabled":false,"triggers":[]},"maxReplicas":3,"minReplicas":1}` | Autoscaling configurations |
+| metricsGenerator.autoscaling.enabled | bool | `false` | Enable autoscaling for the metrics-generator |
+| metricsGenerator.autoscaling.hpa | object | `{"behavior":{},"enabled":false,"targetCPUUtilizationPercentage":100,"targetMemoryUtilizationPercentage":null}` | Autoscaling via HPA object |
+| metricsGenerator.autoscaling.hpa.behavior | object | `{}` | Autoscaling behavior configuration for the metrics-generator |
+| metricsGenerator.autoscaling.hpa.targetCPUUtilizationPercentage | int | `100` | Target CPU utilisation percentage for the metrics-generator |
+| metricsGenerator.autoscaling.hpa.targetMemoryUtilizationPercentage | string | `nil` | Target memory utilisation percentage for the metrics-generator |
+| metricsGenerator.autoscaling.keda | object | `{"enabled":false,"triggers":[]}` | Autoscaling via keda/ScaledObject |
+| metricsGenerator.autoscaling.keda.triggers | list | `[]` | List of autoscaling triggers for the metrics-generator |
+| metricsGenerator.autoscaling.maxReplicas | int | `3` | Maximum autoscaling replicas for the metrics-generator |
+| metricsGenerator.autoscaling.minReplicas | int | `1` | Minimum autoscaling replicas for the metrics-generator |
 | metricsGenerator.config | object | `{"metrics_ingestion_time_range_slack":"30s","processor":{"service_graphs":{"dimensions":[],"histogram_buckets":[0.1,0.2,0.4,0.8,1.6,3.2,6.4,12.8],"max_items":10000,"wait":"10s","workers":10},"span_metrics":{"dimensions":[],"histogram_buckets":[0.002,0.004,0.008,0.016,0.032,0.064,0.128,0.256,0.512,1.02,2.05,4.1]}},"registry":{"collection_interval":"15s","external_labels":{},"stale_duration":"15m"},"storage":{"path":"/var/tempo/wal","remote_write":[],"remote_write_add_org_id_header":true,"remote_write_flush_deadline":"1m","wal":null},"traces_storage":{"path":"/var/tempo/traces"}}` | More information on configuration: https://grafana.com/docs/tempo/latest/configuration/#metrics-generator |
 | metricsGenerator.config.processor.service_graphs | object | `{"dimensions":[],"histogram_buckets":[0.1,0.2,0.4,0.8,1.6,3.2,6.4,12.8],"max_items":10000,"wait":"10s","workers":10}` | For processors to be enabled and generate metrics, pass the names of the processors to overrides.metrics_generator_processors value like [service-graphs, span-metrics] |
 | metricsGenerator.config.processor.service_graphs.dimensions | list | `[]` | resource and span attributes and are added to the metrics if present. |
diff --git a/charts/tempo-distributed/templates/metrics-generator/hpa.yaml b/charts/tempo-distributed/templates/metrics-generator/hpa.yaml
new file mode 100644
index 0000000000..b12134830d
--- /dev/null
+++ b/charts/tempo-distributed/templates/metrics-generator/hpa.yaml
@@ -0,0 +1,47 @@
+{{- if and .Values.metricsGenerator.autoscaling.enabled .Values.metricsGenerator.autoscaling.hpa.enabled }}
+{{- $apiVersion := include "tempo.hpa.apiVersion" . -}}
+{{ $dict := dict "ctx" . "component" "metrics-generator" }}
+apiVersion: {{ $apiVersion }}
+kind: HorizontalPodAutoscaler
+metadata:
+  name: {{ include "tempo.resourceName" $dict }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "tempo.labels" $dict | nindent 4 }}
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: {{ .Values.metricsGenerator.kind }}
+    name: {{ include "tempo.resourceName" $dict }}
+  minReplicas: {{ .Values.metricsGenerator.autoscaling.minReplicas }}
+  maxReplicas: {{ .Values.metricsGenerator.autoscaling.maxReplicas }}
+  {{- with .Values.metricsGenerator.autoscaling.hpa.behavior }}
+  behavior:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+  metrics:
+  {{- with .Values.metricsGenerator.autoscaling.hpa.targetMemoryUtilizationPercentage }}
+    - type: Resource
+      resource:
+        name: memory
+        {{- if (eq $apiVersion "autoscaling/v2") }}
+        target:
+          type: Utilization
+          averageUtilization: {{ . }}
+        {{- else }}
+        targetAverageUtilization: {{ . }}
+        {{- end }}
+  {{- end }}
+  {{- with .Values.metricsGenerator.autoscaling.hpa.targetCPUUtilizationPercentage }}
+    - type: Resource
+      resource:
+        name: cpu
+        {{- if (eq $apiVersion "autoscaling/v2") }}
+        target:
+          type: Utilization
+          averageUtilization: {{ . }}
+        {{- else }}
+        targetAverageUtilization: {{ . }}
+        {{- end }}
+  {{- end }}
+{{- end }}
diff --git a/charts/tempo-distributed/templates/metrics-generator/keda-scaled-object.yaml b/charts/tempo-distributed/templates/metrics-generator/keda-scaled-object.yaml
new file mode 100644
index 0000000000..6eb53b4a91
--- /dev/null
+++ b/charts/tempo-distributed/templates/metrics-generator/keda-scaled-object.yaml
@@ -0,0 +1,26 @@
+{{- if and .Values.metricsGenerator.autoscaling.enabled .Values.metricsGenerator.autoscaling.keda.enabled }}
+{{ $dict := dict "ctx" . "component" "metrics-generator" }}
+apiVersion: keda.sh/v1alpha1
+kind: ScaledObject
+metadata:
+  name: {{ include "tempo.resourceName" $dict }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "tempo.labels" $dict | nindent 4 }}
+spec:
+  minReplicaCount: {{ .Values.metricsGenerator.autoscaling.minReplicas }}
+  maxReplicaCount: {{ .Values.metricsGenerator.autoscaling.maxReplicas }}
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: {{ include "tempo.resourceName" $dict }}
+  triggers:
+  {{- range .Values.metricsGenerator.autoscaling.keda.triggers }}
+  - type: {{ .type | quote }}
+    metadata:
+      serverAddress: {{ .metadata.serverAddress }}
+      threshold: {{ .metadata.threshold | quote }}
+      query: |
+        {{- .metadata.query | nindent 8 }}
+  {{- end }}
+{{- end }}
diff --git a/charts/tempo-distributed/values.yaml b/charts/tempo-distributed/values.yaml
index fb890d7036..b94b8e4555 100755
--- a/charts/tempo-distributed/values.yaml
+++ b/charts/tempo-distributed/values.yaml
@@ -313,6 +313,36 @@ metricsGenerator:
     repository: null
     # -- Docker image tag for the metrics-generator image. Overrides `tempo.image.tag`
     tag: null
+  # -- Autoscaling configurations
+  autoscaling:
+    # -- Enable autoscaling for the metrics-generator
+    enabled: false
+    # -- Minimum autoscaling replicas for the metrics-generator
+    minReplicas: 1
+    # -- Maximum autoscaling replicas for the metrics-generator
+    maxReplicas: 3
+    # -- Autoscaling via HPA object
+    hpa:
+      enabled: false
+      # -- Autoscaling behavior configuration for the metrics-generator
+      behavior: {}
+      # -- Target CPU utilisation percentage for the metrics-generator
+      targetCPUUtilizationPercentage: 100
+      # -- Target memory utilisation percentage for the metrics-generator
+      targetMemoryUtilizationPercentage:
+    # -- Autoscaling via keda/ScaledObject
+    keda:
+      # requires https://keda.sh/
+      enabled: false
+      # -- List of autoscaling triggers for the metrics-generator
+      triggers: []
+      # - type: prometheus
+      #   metadata:
+      #     serverAddress: "http://<prometheus-host>:9090"
+      #     threshold: "250"
+      #     query: |-
+      #       sum(prometheus_remote_storage_shards_desired{job="default/metrics-generator"} /
+      #       prometheus_remote_storage_shards_max{job="default/metrics-generator"})by(job)
   # -- The name of the PriorityClass for metrics-generator pods
   priorityClassName: null
   # -- Labels for metrics-generator pods