Skip to content

Commit

Permalink
feat(prometheus): Removing prometheus recording rules
Browse files Browse the repository at this point in the history
  • Loading branch information
rnishtala-sumo committed Aug 14, 2023
1 parent 206beda commit 13977bf
Show file tree
Hide file tree
Showing 5 changed files with 2 additions and 418 deletions.
1 change: 1 addition & 0 deletions .changelog/3211.changed.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
feat(prometheus): Removing prometheus recording rules
1 change: 0 additions & 1 deletion deploy/helm/sumologic/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,6 @@ The following table lists the configurable parameters of the Sumo Logic chart an
| `kube-prometheus-stack.prometheus-node-exporter.nodeSelector` | Node selector for prometheus node exporter. [See docs/best-practices.md for more information.](/docs/best-practices.md) | `{}` |
| `kube-prometheus-stack.kube-state-metrics.nodeSelector` | Node selector for kube-state-metrics. [See docs/best-practices.md for more information.](/docs/best-practices.md) | `{}` |
| `kube-prometheus-stack.kube-state-metrics.image.tag` | Tag for kube-state-metrics Docker image. | `v2.7.0` |
| `kube-prometheus-stack.additionalPrometheusRulesMap` | Custom recording or alerting rules to be deployed into the cluster | See [values.yaml] |
| `kube-prometheus-stack.commonLabels` | Labels to apply to all Kube Prometheus Stack resources | `{}` |
| `kube-prometheus-stack.coreDns.serviceMonitor.interval` | Core DNS metrics scrape interval. If not set, the Prometheus default scrape interval is used. | `Nil` |
| `kube-prometheus-stack.coreDns.serviceMonitor.metricRelabelings` | Core DNS MetricRelabelConfigs | See [values.yaml] |
Expand Down
192 changes: 0 additions & 192 deletions deploy/helm/sumologic/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1551,198 +1551,6 @@ kube-prometheus-stack:
prometheusOperator: false
windows: false

## k8s pre-1.14 prometheus recording rules
additionalPrometheusRulesMap:
pre-1.14-node-rules:
groups:
- name: node-pre-1.14.rules
rules:
- expr: sum(min(kube_pod_info) by (node))
record: ":kube_pod_info_node_count:"
- expr: 1 - avg(rate(node_cpu_seconds_total{job="node-exporter",mode="idle"}[1m]))
record: :node_cpu_utilisation:avg1m
- expr: |-
1 - avg by (node) (
rate(node_cpu_seconds_total{job="node-exporter",mode="idle"}[1m])
* on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info:)
record: node:node_cpu_utilisation:avg1m
- expr: |-
1 -
sum(
node_memory_MemFree_bytes{job="node-exporter"} +
node_memory_Cached_bytes{job="node-exporter"} +
node_memory_Buffers_bytes{job="node-exporter"}
)
/
sum(node_memory_MemTotal_bytes{job="node-exporter"})
record: ":node_memory_utilisation:"
- expr: |-
sum by (node) (
(
node_memory_MemFree_bytes{job="node-exporter"} +
node_memory_Cached_bytes{job="node-exporter"} +
node_memory_Buffers_bytes{job="node-exporter"}
)
* on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info:
)
record: node:node_memory_bytes_available:sum
- expr: |-
(node:node_memory_bytes_total:sum - node:node_memory_bytes_available:sum)
/
node:node_memory_bytes_total:sum
record: node:node_memory_utilisation:ratio
- expr: |-
1 -
sum by (node) (
(
node_memory_MemFree_bytes{job="node-exporter"} +
node_memory_Cached_bytes{job="node-exporter"} +
node_memory_Buffers_bytes{job="node-exporter"}
)
* on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info:
)
/
sum by (node) (
node_memory_MemTotal_bytes{job="node-exporter"}
* on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info:
)
record: "node:node_memory_utilisation:"
- expr: 1 - (node:node_memory_bytes_available:sum / node:node_memory_bytes_total:sum)
record: "node:node_memory_utilisation_2:"
- expr: |-
max by (instance, namespace, pod, device) ((node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"}
- node_filesystem_avail_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"})
/ node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"})
record: "node:node_filesystem_usage:"
- expr: |-
sum by (node) (
node_memory_MemTotal_bytes{job="node-exporter"}
* on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info:
)
record: node:node_memory_bytes_total:sum
- expr: |-
sum(irate(node_network_receive_bytes_total{job="node-exporter",device!~"veth.+"}[1m])) +
sum(irate(node_network_transmit_bytes_total{job="node-exporter",device!~"veth.+"}[1m]))
record: :node_net_utilisation:sum_irate
- expr: |-
sum by (node) (
(irate(node_network_receive_bytes_total{job="node-exporter",device!~"veth.+"}[1m]) +
irate(node_network_transmit_bytes_total{job="node-exporter",device!~"veth.+"}[1m]))
* on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info:
)
record: node:node_net_utilisation:sum_irate
- expr: |-
sum(irate(node_network_receive_drop_total{job="node-exporter",device!~"veth.+"}[1m])) +
sum(irate(node_network_transmit_drop_total{job="node-exporter",device!~"veth.+"}[1m]))
record: :node_net_saturation:sum_irate
- expr: |-
sum by (node) (
(irate(node_network_receive_drop_total{job="node-exporter",device!~"veth.+"}[1m]) +
irate(node_network_transmit_drop_total{job="node-exporter",device!~"veth.+"}[1m]))
* on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info:
)
record: node:node_net_saturation:sum_irate
- expr: |-
sum(node_load1{job="node-exporter"})
/
sum(node:node_num_cpu:sum)
record: ":node_cpu_saturation_load1:"
- expr: avg(irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]))
record: :node_disk_saturation:avg_irate
- expr: |-
avg by (node) (
irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m])
* on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info:
)
record: node:node_disk_saturation:avg_irate
- expr: avg(irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]))
record: :node_disk_utilisation:avg_irate
- expr: |-
avg by (node) (
irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m])
* on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info:
)
record: node:node_disk_utilisation:avg_irate
- expr: |-
1e3 * sum(
(rate(node_vmstat_pgpgin{job="node-exporter"}[1m])
+ rate(node_vmstat_pgpgout{job="node-exporter"}[1m]))
)
record: :node_memory_swap_io_bytes:sum_rate
- expr: |-
1e3 * sum by (node) (
(rate(node_vmstat_pgpgin{job="node-exporter"}[1m])
+ rate(node_vmstat_pgpgout{job="node-exporter"}[1m]))
* on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info:
)
record: node:node_memory_swap_io_bytes:sum_rate
- expr: |-
node:node_cpu_utilisation:avg1m
*
node:node_num_cpu:sum
/
scalar(sum(node:node_num_cpu:sum))
record: node:cluster_cpu_utilisation:ratio
- expr: |-
(node:node_memory_bytes_total:sum - node:node_memory_bytes_available:sum)
/
scalar(sum(node:node_memory_bytes_total:sum))
record: node:cluster_memory_utilisation:ratio
- expr: |-
sum by (node) (
node_load1{job="node-exporter"}
* on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info:
)
/
node:node_num_cpu:sum
record: "node:node_cpu_saturation_load1:"
- expr: |-
max by (instance, namespace, pod, device) (
node_filesystem_avail_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"}
/
node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"}
)
record: "node:node_filesystem_avail:"
- expr: |-
max(
max(
kube_pod_info{job="kube-state-metrics", host_ip!=""}
) by (node, host_ip)
* on (host_ip) group_right (node)
label_replace(
(
max(node_filesystem_files{job="node-exporter", mountpoint="/"})
by (instance)
), "host_ip", "$1", "instance", "(.*):.*"
)
) by (node)
record: "node:node_inodes_total:"
- expr: |-
max(
max(
kube_pod_info{job="kube-state-metrics", host_ip!=""}
) by (node, host_ip)
* on (host_ip) group_right (node)
label_replace(
(
max(node_filesystem_files_free{job="node-exporter", mountpoint="/"})
by (instance)
), "host_ip", "$1", "instance", "(.*):.*"
)
) by (node)
record: "node:node_inodes_free:"
## NOTE changing the serviceMonitor scrape interval to be >1m can result in metrics from recording
## rules to be missing and empty panels in Sumo Logic Kubernetes apps.
kubeApiServer:
Expand Down
Loading

0 comments on commit 13977bf

Please sign in to comment.