From 39f1dd8541c2d6aee401e99eb85090b97130fb1a Mon Sep 17 00:00:00 2001 From: Marius Svechla Date: Fri, 15 Nov 2024 14:46:06 +0100 Subject: [PATCH] fix: add autoscaling for tempo-distributed metrics-generator This adds autoscaling via hpa and keda for tempo-distributed metrics-generator. Implementation is analog to the already existing autoscaling options for the compactor. Signed-off-by: Marius Svechla --- charts/tempo-distributed/Chart.yaml | 2 +- charts/tempo-distributed/README.md | 12 ++++- .../templates/metrics-generator/hpa.yaml | 47 +++++++++++++++++++ .../metrics-generator/keda-scaled-object.yaml | 26 ++++++++++ charts/tempo-distributed/values.yaml | 30 ++++++++++++ 5 files changed, 115 insertions(+), 2 deletions(-) create mode 100644 charts/tempo-distributed/templates/metrics-generator/hpa.yaml create mode 100644 charts/tempo-distributed/templates/metrics-generator/keda-scaled-object.yaml diff --git a/charts/tempo-distributed/Chart.yaml b/charts/tempo-distributed/Chart.yaml index b46aa73790..9f429ef908 100644 --- a/charts/tempo-distributed/Chart.yaml +++ b/charts/tempo-distributed/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: tempo-distributed description: Grafana Tempo in MicroService mode type: application -version: 1.22.0 +version: 1.23.0 appVersion: 2.6.0 engine: gotpl home: https://grafana.com/docs/tempo/latest/ diff --git a/charts/tempo-distributed/README.md b/charts/tempo-distributed/README.md index d9d847dfe6..95ee18ceae 100755 --- a/charts/tempo-distributed/README.md +++ b/charts/tempo-distributed/README.md @@ -1,6 +1,6 @@ # tempo-distributed -![Version: 1.22.0](https://img.shields.io/badge/Version-1.22.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 2.6.0](https://img.shields.io/badge/AppVersion-2.6.0-informational?style=flat-square) +![Version: 1.23.0](https://img.shields.io/badge/Version-1.23.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 2.6.0](https://img.shields.io/badge/AppVersion-2.6.0-informational?style=flat-square) Grafana Tempo in MicroService mode @@ -638,6 +638,16 @@ The memcached default args are removed and should be provided manually. The sett | metricsGenerator.annotations | object | `{}` | Annotations for the metrics-generator StatefulSet | | metricsGenerator.appProtocol | object | `{"grpc":null}` | Adds the appProtocol field to the metricsGenerator service. This allows metricsGenerator to work with istio protocol selection. | | metricsGenerator.appProtocol.grpc | string | `nil` | Set the optional grpc service protocol. Ex: "grpc", "http2" or "https" | +| metricsGenerator.autoscaling | object | `{"enabled":false,"hpa":{"behavior":{},"enabled":false,"targetCPUUtilizationPercentage":100,"targetMemoryUtilizationPercentage":null},"keda":{"enabled":false,"triggers":[]},"maxReplicas":3,"minReplicas":1}` | Autoscaling configurations | +| metricsGenerator.autoscaling.enabled | bool | `false` | Enable autoscaling for the metrics-generator | +| metricsGenerator.autoscaling.hpa | object | `{"behavior":{},"enabled":false,"targetCPUUtilizationPercentage":100,"targetMemoryUtilizationPercentage":null}` | Autoscaling via HPA object | +| metricsGenerator.autoscaling.hpa.behavior | object | `{}` | Autoscaling behavior configuration for the metrics-generator | +| metricsGenerator.autoscaling.hpa.targetCPUUtilizationPercentage | int | `100` | Target CPU utilisation percentage for the metrics-generator | +| metricsGenerator.autoscaling.hpa.targetMemoryUtilizationPercentage | string | `nil` | Target memory utilisation percentage for the metrics-generator | +| metricsGenerator.autoscaling.keda | object | `{"enabled":false,"triggers":[]}` | Autoscaling via keda/ScaledObject | +| metricsGenerator.autoscaling.keda.triggers | list | `[]` | List of autoscaling triggers for the metrics-generator | +| metricsGenerator.autoscaling.maxReplicas | int | `3` | Maximum autoscaling replicas for the metrics-generator | +| metricsGenerator.autoscaling.minReplicas | int | `1` | Minimum autoscaling replicas for the metrics-generator | | metricsGenerator.config | object | `{"metrics_ingestion_time_range_slack":"30s","processor":{"service_graphs":{"dimensions":[],"histogram_buckets":[0.1,0.2,0.4,0.8,1.6,3.2,6.4,12.8],"max_items":10000,"wait":"10s","workers":10},"span_metrics":{"dimensions":[],"histogram_buckets":[0.002,0.004,0.008,0.016,0.032,0.064,0.128,0.256,0.512,1.02,2.05,4.1]}},"registry":{"collection_interval":"15s","external_labels":{},"stale_duration":"15m"},"storage":{"path":"/var/tempo/wal","remote_write":[],"remote_write_add_org_id_header":true,"remote_write_flush_deadline":"1m","wal":null},"traces_storage":{"path":"/var/tempo/traces"}}` | More information on configuration: https://grafana.com/docs/tempo/latest/configuration/#metrics-generator | | metricsGenerator.config.processor.service_graphs | object | `{"dimensions":[],"histogram_buckets":[0.1,0.2,0.4,0.8,1.6,3.2,6.4,12.8],"max_items":10000,"wait":"10s","workers":10}` | For processors to be enabled and generate metrics, pass the names of the processors to overrides.metrics_generator_processors value like [service-graphs, span-metrics] | | metricsGenerator.config.processor.service_graphs.dimensions | list | `[]` | resource and span attributes and are added to the metrics if present. | diff --git a/charts/tempo-distributed/templates/metrics-generator/hpa.yaml b/charts/tempo-distributed/templates/metrics-generator/hpa.yaml new file mode 100644 index 0000000000..b12134830d --- /dev/null +++ b/charts/tempo-distributed/templates/metrics-generator/hpa.yaml @@ -0,0 +1,47 @@ +{{- if and .Values.metricsGenerator.autoscaling.enabled .Values.metricsGenerator.autoscaling.hpa.enabled }} +{{- $apiVersion := include "tempo.hpa.apiVersion" . -}} +{{ $dict := dict "ctx" . "component" "metrics-generator" }} +apiVersion: {{ $apiVersion }} +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "tempo.resourceName" $dict }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "tempo.labels" $dict | nindent 4 }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: {{ .Values.metricsGenerator.kind }} + name: {{ include "tempo.resourceName" $dict }} + minReplicas: {{ .Values.metricsGenerator.autoscaling.minReplicas }} + maxReplicas: {{ .Values.metricsGenerator.autoscaling.maxReplicas }} + {{- with .Values.metricsGenerator.autoscaling.hpa.behavior }} + behavior: + {{- toYaml . | nindent 4 }} + {{- end }} + metrics: + {{- with .Values.metricsGenerator.autoscaling.hpa.targetMemoryUtilizationPercentage }} + - type: Resource + resource: + name: memory + {{- if (eq $apiVersion "autoscaling/v2") }} + target: + type: Utilization + averageUtilization: {{ . }} + {{- else }} + targetAverageUtilization: {{ . }} + {{- end }} + {{- end }} + {{- with .Values.metricsGenerator.autoscaling.hpa.targetCPUUtilizationPercentage }} + - type: Resource + resource: + name: cpu + {{- if (eq $apiVersion "autoscaling/v2") }} + target: + type: Utilization + averageUtilization: {{ . }} + {{- else }} + targetAverageUtilization: {{ . }} + {{- end }} + {{- end }} +{{- end }} diff --git a/charts/tempo-distributed/templates/metrics-generator/keda-scaled-object.yaml b/charts/tempo-distributed/templates/metrics-generator/keda-scaled-object.yaml new file mode 100644 index 0000000000..6eb53b4a91 --- /dev/null +++ b/charts/tempo-distributed/templates/metrics-generator/keda-scaled-object.yaml @@ -0,0 +1,26 @@ +{{- if and .Values.metricsGenerator.autoscaling.enabled .Values.metricsGenerator.autoscaling.keda.enabled }} +{{ $dict := dict "ctx" . "component" "metrics-generator" }} +apiVersion: keda.sh/v1alpha1 +kind: ScaledObject +metadata: + name: {{ include "tempo.resourceName" $dict }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "tempo.labels" $dict | nindent 4 }} +spec: + minReplicaCount: {{ .Values.metricsGenerator.autoscaling.minReplicas }} + maxReplicaCount: {{ .Values.metricsGenerator.autoscaling.maxReplicas }} + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "tempo.resourceName" $dict }} + triggers: + {{- range .Values.metricsGenerator.autoscaling.keda.triggers }} + - type: {{ .type | quote }} + metadata: + serverAddress: {{ .metadata.serverAddress }} + threshold: {{ .metadata.threshold | quote }} + query: | + {{- .metadata.query | nindent 8 }} + {{- end }} +{{- end }} diff --git a/charts/tempo-distributed/values.yaml b/charts/tempo-distributed/values.yaml index fb890d7036..b94b8e4555 100755 --- a/charts/tempo-distributed/values.yaml +++ b/charts/tempo-distributed/values.yaml @@ -313,6 +313,36 @@ metricsGenerator: repository: null # -- Docker image tag for the metrics-generator image. Overrides `tempo.image.tag` tag: null + # -- Autoscaling configurations + autoscaling: + # -- Enable autoscaling for the metrics-generator + enabled: false + # -- Minimum autoscaling replicas for the metrics-generator + minReplicas: 1 + # -- Maximum autoscaling replicas for the metrics-generator + maxReplicas: 3 + # -- Autoscaling via HPA object + hpa: + enabled: false + # -- Autoscaling behavior configuration for the metrics-generator + behavior: {} + # -- Target CPU utilisation percentage for the metrics-generator + targetCPUUtilizationPercentage: 100 + # -- Target memory utilisation percentage for the metrics-generator + targetMemoryUtilizationPercentage: + # -- Autoscaling via keda/ScaledObject + keda: + # requires https://keda.sh/ + enabled: false + # -- List of autoscaling triggers for the metrics-generator + triggers: [] + # - type: prometheus + # metadata: + # serverAddress: "http://:9090" + # threshold: "250" + # query: |- + # sum(prometheus_remote_storage_shards_desired{job="default/metrics-generator"} / + # prometheus_remote_storage_shards_max{job="default/metrics-generator"})by(job) # -- The name of the PriorityClass for metrics-generator pods priorityClassName: null # -- Labels for metrics-generator pods