diff --git a/.changelog/3211.changed.txt b/.changelog/3211.changed.txt new file mode 100644 index 0000000000..815b5fafb9 --- /dev/null +++ b/.changelog/3211.changed.txt @@ -0,0 +1 @@ +feat(prometheus): Removing prometheus recording rules \ No newline at end of file diff --git a/deploy/helm/sumologic/README.md b/deploy/helm/sumologic/README.md index d0e702d613..376a9d875b 100644 --- a/deploy/helm/sumologic/README.md +++ b/deploy/helm/sumologic/README.md @@ -378,7 +378,6 @@ The following table lists the configurable parameters of the Sumo Logic chart an | `kube-prometheus-stack.prometheus-node-exporter.nodeSelector` | Node selector for prometheus node exporter. [See docs/best-practices.md for more information.](/docs/best-practices.md) | `{}` | | `kube-prometheus-stack.kube-state-metrics.nodeSelector` | Node selector for kube-state-metrics. [See docs/best-practices.md for more information.](/docs/best-practices.md) | `{}` | | `kube-prometheus-stack.kube-state-metrics.image.tag` | Tag for kube-state-metrics Docker image. | `v2.7.0` | -| `kube-prometheus-stack.additionalPrometheusRulesMap` | Custom recording or alerting rules to be deployed into the cluster | See [values.yaml] | | `kube-prometheus-stack.commonLabels` | Labels to apply to all Kube Prometheus Stack resources | `{}` | | `kube-prometheus-stack.coreDns.serviceMonitor.interval` | Core DNS metrics scrape interval. If not set, the Prometheus default scrape interval is used. | `Nil` | | `kube-prometheus-stack.coreDns.serviceMonitor.metricRelabelings` | Core DNS MetricRelabelConfigs | See [values.yaml] | diff --git a/deploy/helm/sumologic/values.yaml b/deploy/helm/sumologic/values.yaml index c14c79fdcb..5a12b676c3 100644 --- a/deploy/helm/sumologic/values.yaml +++ b/deploy/helm/sumologic/values.yaml @@ -1551,198 +1551,6 @@ kube-prometheus-stack: prometheusOperator: false windows: false - ## k8s pre-1.14 prometheus recording rules - additionalPrometheusRulesMap: - pre-1.14-node-rules: - groups: - - name: node-pre-1.14.rules - rules: - - expr: sum(min(kube_pod_info) by (node)) - record: ":kube_pod_info_node_count:" - - expr: 1 - avg(rate(node_cpu_seconds_total{job="node-exporter",mode="idle"}[1m])) - record: :node_cpu_utilisation:avg1m - - expr: |- - 1 - avg by (node) ( - rate(node_cpu_seconds_total{job="node-exporter",mode="idle"}[1m]) - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info:) - record: node:node_cpu_utilisation:avg1m - - expr: |- - 1 - - sum( - node_memory_MemFree_bytes{job="node-exporter"} + - node_memory_Cached_bytes{job="node-exporter"} + - node_memory_Buffers_bytes{job="node-exporter"} - ) - / - sum(node_memory_MemTotal_bytes{job="node-exporter"}) - record: ":node_memory_utilisation:" - - expr: |- - sum by (node) ( - ( - node_memory_MemFree_bytes{job="node-exporter"} + - node_memory_Cached_bytes{job="node-exporter"} + - node_memory_Buffers_bytes{job="node-exporter"} - ) - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info: - ) - record: node:node_memory_bytes_available:sum - - expr: |- - (node:node_memory_bytes_total:sum - node:node_memory_bytes_available:sum) - / - node:node_memory_bytes_total:sum - record: node:node_memory_utilisation:ratio - - expr: |- - 1 - - sum by (node) ( - ( - node_memory_MemFree_bytes{job="node-exporter"} + - node_memory_Cached_bytes{job="node-exporter"} + - node_memory_Buffers_bytes{job="node-exporter"} - ) - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info: - ) - / - sum by (node) ( - node_memory_MemTotal_bytes{job="node-exporter"} - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info: - ) - record: "node:node_memory_utilisation:" - - expr: 1 - (node:node_memory_bytes_available:sum / node:node_memory_bytes_total:sum) - record: "node:node_memory_utilisation_2:" - - expr: |- - max by (instance, namespace, pod, device) ((node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"} - - node_filesystem_avail_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"}) - / node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"}) - record: "node:node_filesystem_usage:" - - expr: |- - sum by (node) ( - node_memory_MemTotal_bytes{job="node-exporter"} - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info: - ) - record: node:node_memory_bytes_total:sum - - expr: |- - sum(irate(node_network_receive_bytes_total{job="node-exporter",device!~"veth.+"}[1m])) + - sum(irate(node_network_transmit_bytes_total{job="node-exporter",device!~"veth.+"}[1m])) - record: :node_net_utilisation:sum_irate - - expr: |- - sum by (node) ( - (irate(node_network_receive_bytes_total{job="node-exporter",device!~"veth.+"}[1m]) + - irate(node_network_transmit_bytes_total{job="node-exporter",device!~"veth.+"}[1m])) - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info: - ) - record: node:node_net_utilisation:sum_irate - - expr: |- - sum(irate(node_network_receive_drop_total{job="node-exporter",device!~"veth.+"}[1m])) + - sum(irate(node_network_transmit_drop_total{job="node-exporter",device!~"veth.+"}[1m])) - record: :node_net_saturation:sum_irate - - expr: |- - sum by (node) ( - (irate(node_network_receive_drop_total{job="node-exporter",device!~"veth.+"}[1m]) + - irate(node_network_transmit_drop_total{job="node-exporter",device!~"veth.+"}[1m])) - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info: - ) - record: node:node_net_saturation:sum_irate - - expr: |- - sum(node_load1{job="node-exporter"}) - / - sum(node:node_num_cpu:sum) - record: ":node_cpu_saturation_load1:" - - expr: avg(irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m])) - record: :node_disk_saturation:avg_irate - - expr: |- - avg by (node) ( - irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]) - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info: - ) - record: node:node_disk_saturation:avg_irate - - expr: avg(irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m])) - record: :node_disk_utilisation:avg_irate - - expr: |- - avg by (node) ( - irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]) - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info: - ) - record: node:node_disk_utilisation:avg_irate - - expr: |- - 1e3 * sum( - (rate(node_vmstat_pgpgin{job="node-exporter"}[1m]) - + rate(node_vmstat_pgpgout{job="node-exporter"}[1m])) - ) - record: :node_memory_swap_io_bytes:sum_rate - - expr: |- - 1e3 * sum by (node) ( - (rate(node_vmstat_pgpgin{job="node-exporter"}[1m]) - + rate(node_vmstat_pgpgout{job="node-exporter"}[1m])) - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info: - ) - record: node:node_memory_swap_io_bytes:sum_rate - - expr: |- - node:node_cpu_utilisation:avg1m - * - node:node_num_cpu:sum - / - scalar(sum(node:node_num_cpu:sum)) - record: node:cluster_cpu_utilisation:ratio - - expr: |- - (node:node_memory_bytes_total:sum - node:node_memory_bytes_available:sum) - / - scalar(sum(node:node_memory_bytes_total:sum)) - record: node:cluster_memory_utilisation:ratio - - expr: |- - sum by (node) ( - node_load1{job="node-exporter"} - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info: - ) - / - node:node_num_cpu:sum - record: "node:node_cpu_saturation_load1:" - - expr: |- - max by (instance, namespace, pod, device) ( - node_filesystem_avail_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"} - / - node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"} - ) - record: "node:node_filesystem_avail:" - - expr: |- - max( - max( - kube_pod_info{job="kube-state-metrics", host_ip!=""} - ) by (node, host_ip) - * on (host_ip) group_right (node) - label_replace( - ( - max(node_filesystem_files{job="node-exporter", mountpoint="/"}) - by (instance) - ), "host_ip", "$1", "instance", "(.*):.*" - ) - ) by (node) - record: "node:node_inodes_total:" - - expr: |- - max( - max( - kube_pod_info{job="kube-state-metrics", host_ip!=""} - ) by (node, host_ip) - * on (host_ip) group_right (node) - label_replace( - ( - max(node_filesystem_files_free{job="node-exporter", mountpoint="/"}) - by (instance) - ), "host_ip", "$1", "instance", "(.*):.*" - ) - ) by (node) - record: "node:node_inodes_free:" - ## NOTE changing the serviceMonitor scrape interval to be >1m can result in metrics from recording ## rules to be missing and empty panels in Sumo Logic Kubernetes apps. kubeApiServer: diff --git a/examples/kube_prometheus_stack/values-prometheus.yaml b/examples/kube_prometheus_stack/values-prometheus.yaml index 15006b50c1..87b8c685d6 100644 --- a/examples/kube_prometheus_stack/values-prometheus.yaml +++ b/examples/kube_prometheus_stack/values-prometheus.yaml @@ -16,198 +16,6 @@ ## Labels to apply to all kube-prometheus-stack resources commonLabels: {} -## k8s pre-1.14 prometheus recording rules -additionalPrometheusRulesMap: - pre-1.14-node-rules: - groups: - - name: node-pre-1.14.rules - rules: - - expr: sum(min(kube_pod_info) by (node)) - record: ":kube_pod_info_node_count:" - - expr: 1 - avg(rate(node_cpu_seconds_total{job="node-exporter",mode="idle"}[1m])) - record: :node_cpu_utilisation:avg1m - - expr: |- - 1 - avg by (node) ( - rate(node_cpu_seconds_total{job="node-exporter",mode="idle"}[1m]) - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info:) - record: node:node_cpu_utilisation:avg1m - - expr: |- - 1 - - sum( - node_memory_MemFree_bytes{job="node-exporter"} + - node_memory_Cached_bytes{job="node-exporter"} + - node_memory_Buffers_bytes{job="node-exporter"} - ) - / - sum(node_memory_MemTotal_bytes{job="node-exporter"}) - record: ":node_memory_utilisation:" - - expr: |- - sum by (node) ( - ( - node_memory_MemFree_bytes{job="node-exporter"} + - node_memory_Cached_bytes{job="node-exporter"} + - node_memory_Buffers_bytes{job="node-exporter"} - ) - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info: - ) - record: node:node_memory_bytes_available:sum - - expr: |- - (node:node_memory_bytes_total:sum - node:node_memory_bytes_available:sum) - / - node:node_memory_bytes_total:sum - record: node:node_memory_utilisation:ratio - - expr: |- - 1 - - sum by (node) ( - ( - node_memory_MemFree_bytes{job="node-exporter"} + - node_memory_Cached_bytes{job="node-exporter"} + - node_memory_Buffers_bytes{job="node-exporter"} - ) - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info: - ) - / - sum by (node) ( - node_memory_MemTotal_bytes{job="node-exporter"} - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info: - ) - record: "node:node_memory_utilisation:" - - expr: 1 - (node:node_memory_bytes_available:sum / node:node_memory_bytes_total:sum) - record: "node:node_memory_utilisation_2:" - - expr: |- - max by (instance, namespace, pod, device) ((node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"} - - node_filesystem_avail_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"}) - / node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"}) - record: "node:node_filesystem_usage:" - - expr: |- - sum by (node) ( - node_memory_MemTotal_bytes{job="node-exporter"} - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info: - ) - record: node:node_memory_bytes_total:sum - - expr: |- - sum(irate(node_network_receive_bytes_total{job="node-exporter",device!~"veth.+"}[1m])) + - sum(irate(node_network_transmit_bytes_total{job="node-exporter",device!~"veth.+"}[1m])) - record: :node_net_utilisation:sum_irate - - expr: |- - sum by (node) ( - (irate(node_network_receive_bytes_total{job="node-exporter",device!~"veth.+"}[1m]) + - irate(node_network_transmit_bytes_total{job="node-exporter",device!~"veth.+"}[1m])) - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info: - ) - record: node:node_net_utilisation:sum_irate - - expr: |- - sum(irate(node_network_receive_drop_total{job="node-exporter",device!~"veth.+"}[1m])) + - sum(irate(node_network_transmit_drop_total{job="node-exporter",device!~"veth.+"}[1m])) - record: :node_net_saturation:sum_irate - - expr: |- - sum by (node) ( - (irate(node_network_receive_drop_total{job="node-exporter",device!~"veth.+"}[1m]) + - irate(node_network_transmit_drop_total{job="node-exporter",device!~"veth.+"}[1m])) - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info: - ) - record: node:node_net_saturation:sum_irate - - expr: |- - sum(node_load1{job="node-exporter"}) - / - sum(node:node_num_cpu:sum) - record: ":node_cpu_saturation_load1:" - - expr: avg(irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m])) - record: :node_disk_saturation:avg_irate - - expr: |- - avg by (node) ( - irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]) - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info: - ) - record: node:node_disk_saturation:avg_irate - - expr: avg(irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m])) - record: :node_disk_utilisation:avg_irate - - expr: |- - avg by (node) ( - irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]) - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info: - ) - record: node:node_disk_utilisation:avg_irate - - expr: |- - 1e3 * sum( - (rate(node_vmstat_pgpgin{job="node-exporter"}[1m]) - + rate(node_vmstat_pgpgout{job="node-exporter"}[1m])) - ) - record: :node_memory_swap_io_bytes:sum_rate - - expr: |- - 1e3 * sum by (node) ( - (rate(node_vmstat_pgpgin{job="node-exporter"}[1m]) - + rate(node_vmstat_pgpgout{job="node-exporter"}[1m])) - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info: - ) - record: node:node_memory_swap_io_bytes:sum_rate - - expr: |- - node:node_cpu_utilisation:avg1m - * - node:node_num_cpu:sum - / - scalar(sum(node:node_num_cpu:sum)) - record: node:cluster_cpu_utilisation:ratio - - expr: |- - (node:node_memory_bytes_total:sum - node:node_memory_bytes_available:sum) - / - scalar(sum(node:node_memory_bytes_total:sum)) - record: node:cluster_memory_utilisation:ratio - - expr: |- - sum by (node) ( - node_load1{job="node-exporter"} - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info: - ) - / - node:node_num_cpu:sum - record: "node:node_cpu_saturation_load1:" - - expr: |- - max by (instance, namespace, pod, device) ( - node_filesystem_avail_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"} - / - node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"} - ) - record: "node:node_filesystem_avail:" - - expr: |- - max( - max( - kube_pod_info{job="kube-state-metrics", host_ip!=""} - ) by (node, host_ip) - * on (host_ip) group_right (node) - label_replace( - ( - max(node_filesystem_files{job="node-exporter", mountpoint="/"}) - by (instance) - ), "host_ip", "$1", "instance", "(.*):.*" - ) - ) by (node) - record: "node:node_inodes_total:" - - expr: |- - max( - max( - kube_pod_info{job="kube-state-metrics", host_ip!=""} - ) by (node, host_ip) - * on (host_ip) group_right (node) - label_replace( - ( - max(node_filesystem_files_free{job="node-exporter", mountpoint="/"}) - by (instance) - ), "host_ip", "$1", "instance", "(.*):.*" - ) - ) by (node) - record: "node:node_inodes_free:" - ## NOTE changing the serviceMonitor scrape interval to be >1m can result in metrics from recording ## rules to be missing and empty panels in Sumo Logic Kubernetes apps. kubeApiServer: diff --git a/tests/integration/internal/constants.go b/tests/integration/internal/constants.go index dc38b146f4..5b87ebc22d 100644 --- a/tests/integration/internal/constants.go +++ b/tests/integration/internal/constants.go @@ -321,38 +321,7 @@ var ( "fluentd_output_status_slow_flush_count", "fluentd_output_status_write_count", } - RecordingRuleMetrics = []string{ - ":kube_pod_info_node_count:", - "node:node_num_cpu:sum", - "node_namespace_pod:kube_pod_info:", - "node:node_cpu_utilisation:avg1m", - ":node_memory_utilisation:", - // "node:node_memory_bytes_available:sum", // not present, depends on other recording rules that don't exist - "node:node_memory_utilisation:ratio", - "node:node_memory_utilisation:", - "node:node_memory_utilisation_2:", - "node:node_filesystem_usage:", - "node:node_memory_bytes_total:sum", - ":node_net_utilisation:sum_irate", - "node:node_net_utilisation:sum_irate", - ":node_net_saturation:sum_irate", - "node:node_net_saturation:sum_irate", - ":node_cpu_utilisation:avg1m", - ":node_cpu_saturation_load1:", - ":node_disk_saturation:avg_irate", - "node:node_disk_saturation:avg_irate", - ":node_disk_utilisation:avg_irate", - "node:node_disk_utilisation:avg_irate", - ":node_memory_swap_io_bytes:sum_rate", - "node:node_memory_swap_io_bytes:sum_rate", - "node:cluster_cpu_utilisation:ratio", - "node:cluster_memory_utilisation:ratio", - "node:node_cpu_saturation_load1:", - "node:node_filesystem_avail:", - // "node:node_inodes_total:", // looks like we're not collecting node_filesystem_files which this requires - // "node:node_inodes_free:", // looks like we're not collecting node_filesystem_files_free which this requires - "instance:node_network_receive_bytes:rate:sum", - } + OtherMetrics = []string{ "up", } @@ -414,7 +383,6 @@ var ( CAdvisorMetrics, NodeExporterMetrics, PrometheusMetrics, - RecordingRuleMetrics, OtherMetrics, } DefaultExpectedNginxAnnotatedMetricsGroups = [][]string{