From 9128c3530614bc5934307d7aafd4525683334692 Mon Sep 17 00:00:00 2001 From: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> Date: Thu, 19 Dec 2024 15:59:14 +0100 Subject: [PATCH] more rules Signed-off-by: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> --- .../nexus/acm/dash-acm-nexus-optimization-overview.yaml | 4 ++-- .../grafana/nexus/acm/dash-k8s-compute-resources-cluster.yaml | 2 +- .../manifests/base/grafana/nexus/acm/dash-k8s-etcd.yaml | 2 +- .../manifests/base/grafana/nexus/acm/prometheus-rule.yaml | 4 ++++ .../manifests/base/grafana/nexus/acm/scrape-config.yaml | 4 ++-- 5 files changed, 10 insertions(+), 6 deletions(-) diff --git a/operators/multiclusterobservability/manifests/base/grafana/nexus/acm/dash-acm-nexus-optimization-overview.yaml b/operators/multiclusterobservability/manifests/base/grafana/nexus/acm/dash-acm-nexus-optimization-overview.yaml index 7fe555a58..3639270f1 100644 --- a/operators/multiclusterobservability/manifests/base/grafana/nexus/acm/dash-acm-nexus-optimization-overview.yaml +++ b/operators/multiclusterobservability/manifests/base/grafana/nexus/acm/dash-acm-nexus-optimization-overview.yaml @@ -113,7 +113,7 @@ data: "targets": [ { "exemplar": true, - "expr": "(sum(cluster:kube_pod_container_resource_requests:cpu:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{cluster=\"$cluster\", resource=\"cpu\"})) - (1 - avg(rate(node_cpu_seconds_total{mode=\"idle\",cluster=\"$cluster\"}[$__rate_interval])))", + "expr": "(sum(cluster:kube_pod_container_resource_requests:cpu:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{cluster=\"$cluster\", resource=\"cpu\"})) - (1 - node_cpu_seconds_total:mode_idle:avg_rate5m)", "format": "time_series", "instant": true, "interval": "", @@ -372,7 +372,7 @@ data: "pluginVersion": "8.5.20", "targets": [ { - "expr": "1 - avg(rate(node_cpu_seconds_total{mode=\"idle\", cluster=\"$cluster\"}[$__rate_interval]))", + "expr": "1 - node_cpu_seconds_total:mode_idle:avg_rate5m", "format": "time_series", "instant": true, "intervalFactor": 1, diff --git a/operators/multiclusterobservability/manifests/base/grafana/nexus/acm/dash-k8s-compute-resources-cluster.yaml b/operators/multiclusterobservability/manifests/base/grafana/nexus/acm/dash-k8s-compute-resources-cluster.yaml index 61f6a3ab0..6ac463f3d 100644 --- a/operators/multiclusterobservability/manifests/base/grafana/nexus/acm/dash-k8s-compute-resources-cluster.yaml +++ b/operators/multiclusterobservability/manifests/base/grafana/nexus/acm/dash-k8s-compute-resources-cluster.yaml @@ -115,7 +115,7 @@ data: "pluginVersion": "8.5.20", "targets": [ { - "expr": "1 - avg(rate(node_cpu_seconds_total{mode=\"idle\", cluster=\"$cluster\"}[$__rate_interval]))", + "expr": "1 - node_cpu_seconds_total:mode_idle:avg_rate5m", "format": "time_series", "instant": true, "intervalFactor": 1, diff --git a/operators/multiclusterobservability/manifests/base/grafana/nexus/acm/dash-k8s-etcd.yaml b/operators/multiclusterobservability/manifests/base/grafana/nexus/acm/dash-k8s-etcd.yaml index 57c09882d..492889727 100644 --- a/operators/multiclusterobservability/manifests/base/grafana/nexus/acm/dash-k8s-etcd.yaml +++ b/operators/multiclusterobservability/manifests/base/grafana/nexus/acm/dash-k8s-etcd.yaml @@ -151,7 +151,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(grpc_server_started_total{cluster=\"$cluster\",job=\"etcd\",grpc_type=\"unary\"}[$__rate_interval]))", + "expr": "grpc_server_started_total:etcd_unary:sum_rate", "format": "time_series", "interval": "", "intervalFactor": 2, diff --git a/operators/multiclusterobservability/manifests/base/grafana/nexus/acm/prometheus-rule.yaml b/operators/multiclusterobservability/manifests/base/grafana/nexus/acm/prometheus-rule.yaml index 3d15cc8f0..039bbaccd 100644 --- a/operators/multiclusterobservability/manifests/base/grafana/nexus/acm/prometheus-rule.yaml +++ b/operators/multiclusterobservability/manifests/base/grafana/nexus/acm/prometheus-rule.yaml @@ -194,3 +194,7 @@ spec: record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate - expr: histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job="apiserver"}[5m])) by (instance, name, le)) record: workqueue_queue_duration_seconds_bucket:apiserver:histogram_quantile_99 + - expr: sum(rate(grpc_server_started_total{job="etcd",grpc_type="unary"}[5m])) + record: grpc_server_started_total:etcd_unary:sum_rate + - expr: avg(rate(node_cpu_seconds_total{mode="idle"}[5m])) + record: node_cpu_seconds_total:mode_idle:avg_rate5m diff --git a/operators/multiclusterobservability/manifests/base/grafana/nexus/acm/scrape-config.yaml b/operators/multiclusterobservability/manifests/base/grafana/nexus/acm/scrape-config.yaml index 535f1a780..46ed5040e 100644 --- a/operators/multiclusterobservability/manifests/base/grafana/nexus/acm/scrape-config.yaml +++ b/operators/multiclusterobservability/manifests/base/grafana/nexus/acm/scrape-config.yaml @@ -48,7 +48,7 @@ spec: - '{__name__="etcd_server_proposals_failed_total"}' - '{__name__="etcd_server_proposals_pending"}' - '{__name__="go_goroutines",job="apiserver"}' - - '{__name__="grpc_server_started_total"}' + - '{__name__="grpc_server_started_total:etcd_unary:sum_rate"}' - '{__name__="instance:node_cpu_utilisation:rate1m"}' - '{__name__="instance:node_load1_per_cpu:ratio"}' - '{__name__="instance:node_memory_utilisation:ratio"}' @@ -68,7 +68,7 @@ spec: - '{__name__="kube_resourcequota"}' - '{__name__="namespace_workload_pod:kube_pod_owner:relabel"}' - '{__name__="namespace_workload_pod:kube_pod_owner:relabel:avg"}' - - '{__name__="node_cpu_seconds_total"}' + - '{__name__="node_cpu_seconds_total:mode_idle:avg_rate5m"}' - '{__name__="node_filesystem_avail_bytes"}' - '{__name__="node_filesystem_size_bytes"}' - '{__name__="node_namespace_pod_container:container_cpu_usage_seconds_total:sum"}'