diff --git a/operators/multiclusterobservability/manifests/base/grafana/nexus/acm/prometheus-rule.yaml b/operators/multiclusterobservability/manifests/base/grafana/nexus/acm/prometheus-rule.yaml index 039bbaccd..99ffc58c9 100644 --- a/operators/multiclusterobservability/manifests/base/grafana/nexus/acm/prometheus-rule.yaml +++ b/operators/multiclusterobservability/manifests/base/grafana/nexus/acm/prometheus-rule.yaml @@ -27,31 +27,17 @@ spec: - expr: (histogram_quantile(0.99,sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver", verb!="WATCH"}[5m])) by (le, verb, instance))) record: apiserver_request_duration_seconds:histogram_quantile_99:instance - - expr: sum(kube_node_status_allocatable{resource="cpu"}) - record: cluster:cpu_allocatable:sum - - expr: sum(machine_cpu_cores) - record: cluster:cpu_cores:sum - expr: sum(machine_memory_bytes) record: cluster:machine_memory:sum - - expr: sum(kube_node_status_allocatable{resource="memory"}) - record: cluster:memory_allocatable:sum - - expr: 1 - sum(:node_memory_MemAvailable_bytes:sum) / cluster:memory_allocatable:sum - record: cluster:memory_utilized:ratio - expr: sum(sum(sum(kube_pod_container_resource_requests{resource="cpu",unit="core"}) by (pod,namespace,container) * on(pod,namespace) group_left(phase) max(kube_pod_status_phase{phase=~"Running|Pending|Unknown"} >0) by (pod,namespace,phase)) by (pod,namespace,phase)) record: cluster:kube_pod_container_resource_requests:cpu:sum - - expr: sum(cluster:kube_pod_container_resource_requests:cpu:sum) by (cluster) / sum(kube_node_status_allocatable{resource="cpu"}) by (cluster) - record: cluster:cpu_requested:ratio - expr: sum(sum(sum(kube_pod_container_resource_requests{resource="memory",unit="byte"}) by (pod,namespace,container) * on(pod,namespace) group_left(phase) max(kube_pod_status_phase{phase=~"Running|Pending|Unknown"} >0) by (pod,namespace,phase)) by (pod,namespace,phase)) record: cluster:kube_pod_container_resource_requests:memory:sum - - expr: sum(cluster:kube_pod_container_resource_requests:memory:sum) by (cluster) / sum(kube_node_status_allocatable{resource="memory"}) by (cluster) - record: cluster:memory_requested:ratio - - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m])) - record: cluster:node_cpu:sum_rate5m - - expr: cluster:node_cpu:sum_rate5m / count(sum(node_cpu_seconds_total) + - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m])) / count(sum(node_cpu_seconds_total) BY (instance, cpu)) record: cluster:node_cpu:ratio - expr: | @@ -109,28 +95,6 @@ spec: - expr: | rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[1m]) record: instance_device:node_disk_io_time_weighted_seconds:rate1m - - expr: |- - sum by (namespace, cluster) ( - sum by (namespace, pod, cluster) ( - max by (namespace, pod, container, cluster) ( - kube_pod_container_resource_limits{resource="memory",job="kube-state-metrics"} - ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) ( - kube_pod_status_phase{phase=~"Pending|Running"} == 1 - ) - ) - ) - record: namespace_memory:kube_pod_container_resource_limits:sum - - expr: |- - sum by (namespace, cluster) ( - sum by (namespace, pod, cluster) ( - max by (namespace, pod, container, cluster) ( - kube_pod_container_resource_requests{resource="memory",job="kube-state-metrics"} - ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) ( - kube_pod_status_phase{phase=~"Pending|Running"} == 1 - ) - ) - ) - record: namespace_memory:kube_pod_container_resource_requests:sum - expr: |- max by (cluster, namespace, workload, pod) ( label_replace( diff --git a/operators/multiclusterobservability/manifests/base/grafana/nexus/acm/scrape-config.yaml b/operators/multiclusterobservability/manifests/base/grafana/nexus/acm/scrape-config.yaml index 46ed5040e..d30a686fb 100644 --- a/operators/multiclusterobservability/manifests/base/grafana/nexus/acm/scrape-config.yaml +++ b/operators/multiclusterobservability/manifests/base/grafana/nexus/acm/scrape-config.yaml @@ -19,14 +19,9 @@ spec: - '{__name__="active_streams_watch:grpc_server_handled_total:sum"}' - '{__name__="apiserver_request_duration_seconds:histogram_quantile_99"}' - '{__name__="apiserver_request_duration_seconds:histogram_quantile_99:instance"}' - - '{__name__="cluster:cpu_allocatable:sum"}' - - '{__name__="cluster:cpu_cores:sum"}' - - '{__name__="cluster:cpu_requested:ratio"}' - '{__name__="cluster:kube_pod_container_resource_requests:cpu:sum"}' - '{__name__="cluster:kube_pod_container_resource_requests:memory:sum"}' - '{__name__="cluster:machine_memory:sum"}' - - '{__name__="cluster:memory_requested:ratio"}' - - '{__name__="cluster:memory_utilized:ratio"}' - '{__name__="cluster:node_cpu:ratio"}' - '{__name__="container_cpu_cfs_periods_total"}' - '{__name__="container_cpu_cfs_throttled_periods_total"}'