From 4fa655c96bf9c6a88e3e33c1a4b6238b27001e53 Mon Sep 17 00:00:00 2001 From: Francois Ferrand Date: Tue, 5 Sep 2023 14:44:34 +0200 Subject: [PATCH] Fix dashboard at short/default scale `$__interval` does not work well when the interval is too short. Esp with the default (1h) interval, no data is displayed in the panels: it is better to use `$_rate_interval` instead, which adapts to both the current time range and granularity. Issue: BB-435 --- monitoring/lifecycle/dashboard.json | 64 ++++++++++++++--------------- monitoring/lifecycle/dashboard.py | 6 +-- 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/monitoring/lifecycle/dashboard.json b/monitoring/lifecycle/dashboard.json index dd5e14543..49b2d92a6 100644 --- a/monitoring/lifecycle/dashboard.json +++ b/monitoring/lifecycle/dashboard.json @@ -218,7 +218,7 @@ "targets": [ { "datasource": null, - "expr": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"2..\"}[$__interval]))", + "expr": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"2..\"}[$__rate_interval]))", "format": "time_series", "hide": false, "instant": false, @@ -226,14 +226,14 @@ "intervalFactor": 1, "legendFormat": "HTTP 2xx", "metric": "", - "query": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"2..\"}[$__interval]))", + "query": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"2..\"}[$__rate_interval]))", "refId": "", "step": 10, "target": "" }, { "datasource": null, - "expr": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"3..\"}[$__interval]))", + "expr": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"3..\"}[$__rate_interval]))", "format": "time_series", "hide": false, "instant": false, @@ -241,14 +241,14 @@ "intervalFactor": 1, "legendFormat": "HTTP 3xx", "metric": "", - "query": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"3..\"}[$__interval]))", + "query": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"3..\"}[$__rate_interval]))", "refId": "", "step": 10, "target": "" }, { "datasource": null, - "expr": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"4..\"}[$__interval]))", + "expr": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"4..\"}[$__rate_interval]))", "format": "time_series", "hide": false, "instant": false, @@ -256,14 +256,14 @@ "intervalFactor": 1, "legendFormat": "HTTP 4xx", "metric": "", - "query": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"4..\"}[$__interval]))", + "query": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"4..\"}[$__rate_interval]))", "refId": "", "step": 10, "target": "" }, { "datasource": null, - "expr": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"5..\"}[$__interval]))", + "expr": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"5..\"}[$__rate_interval]))", "format": "time_series", "hide": false, "instant": false, @@ -271,7 +271,7 @@ "intervalFactor": 1, "legendFormat": "HTTP 5xx", "metric": "", - "query": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"5..\"}[$__interval]))", + "query": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"5..\"}[$__rate_interval]))", "refId": "", "step": 10, "target": "" @@ -702,7 +702,7 @@ "targets": [ { "datasource": null, - "expr": "sum(increase(s3_lifecycle_kafka_publish_success_total{op=\"BucketTopic\",namespace=\"${namespace}\"}[$__interval]))", + "expr": "sum(increase(s3_lifecycle_kafka_publish_success_total{op=\"BucketTopic\",namespace=\"${namespace}\"}[$__rate_interval]))", "format": "time_series", "hide": false, "instant": false, @@ -710,7 +710,7 @@ "intervalFactor": 1, "legendFormat": "messages", "metric": "", - "query": "sum(increase(s3_lifecycle_kafka_publish_success_total{op=\"BucketTopic\",namespace=\"${namespace}\"}[$__interval]))", + "query": "sum(increase(s3_lifecycle_kafka_publish_success_total{op=\"BucketTopic\",namespace=\"${namespace}\"}[$__rate_interval]))", "refId": "", "step": 10, "target": "" @@ -788,7 +788,7 @@ "targets": [ { "datasource": null, - "expr": "sum(increase(s3_lifecycle_kafka_publish_error_total{op=\"BucketTopic\",namespace=\"${namespace}\"}[$__interval]))", + "expr": "sum(increase(s3_lifecycle_kafka_publish_error_total{op=\"BucketTopic\",namespace=\"${namespace}\"}[$__rate_interval]))", "format": "time_series", "hide": false, "instant": false, @@ -796,7 +796,7 @@ "intervalFactor": 1, "legendFormat": "messages", "metric": "", - "query": "sum(increase(s3_lifecycle_kafka_publish_error_total{op=\"BucketTopic\",namespace=\"${namespace}\"}[$__interval]))", + "query": "sum(increase(s3_lifecycle_kafka_publish_error_total{op=\"BucketTopic\",namespace=\"${namespace}\"}[$__rate_interval]))", "refId": "", "step": 10, "target": "" @@ -874,7 +874,7 @@ "targets": [ { "datasource": null, - "expr": "sum(increase(s3_lifecycle_kafka_publish_success_total{op=\"ObjectTopic\",namespace=\"${namespace}\"}[$__interval]))", + "expr": "sum(increase(s3_lifecycle_kafka_publish_success_total{op=\"ObjectTopic\",namespace=\"${namespace}\"}[$__rate_interval]))", "format": "time_series", "hide": false, "instant": false, @@ -882,7 +882,7 @@ "intervalFactor": 1, "legendFormat": "messages", "metric": "", - "query": "sum(increase(s3_lifecycle_kafka_publish_success_total{op=\"ObjectTopic\",namespace=\"${namespace}\"}[$__interval]))", + "query": "sum(increase(s3_lifecycle_kafka_publish_success_total{op=\"ObjectTopic\",namespace=\"${namespace}\"}[$__rate_interval]))", "refId": "", "step": 10, "target": "" @@ -960,7 +960,7 @@ "targets": [ { "datasource": null, - "expr": "sum(increase(s3_lifecycle_kafka_publish_error_total{op=\"ObjectTopic\",namespace=\"${namespace}\"}[$__interval]))", + "expr": "sum(increase(s3_lifecycle_kafka_publish_error_total{op=\"ObjectTopic\",namespace=\"${namespace}\"}[$__rate_interval]))", "format": "time_series", "hide": false, "instant": false, @@ -968,7 +968,7 @@ "intervalFactor": 1, "legendFormat": "messages", "metric": "", - "query": "sum(increase(s3_lifecycle_kafka_publish_error_total{op=\"ObjectTopic\",namespace=\"${namespace}\"}[$__interval]))", + "query": "sum(increase(s3_lifecycle_kafka_publish_error_total{op=\"ObjectTopic\",namespace=\"${namespace}\"}[$__rate_interval]))", "refId": "", "step": 10, "target": "" @@ -1289,7 +1289,7 @@ "targets": [ { "datasource": null, - "expr": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"2..\",job=\"${job_lifecycle_bucket_processor}\",origin=\"bucket\"}[$__interval]))", + "expr": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"2..\",job=\"${job_lifecycle_bucket_processor}\",origin=\"bucket\"}[$__rate_interval]))", "format": "time_series", "hide": false, "instant": false, @@ -1297,14 +1297,14 @@ "intervalFactor": 1, "legendFormat": "HTTP 2xx", "metric": "", - "query": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"2..\",job=\"${job_lifecycle_bucket_processor}\",origin=\"bucket\"}[$__interval]))", + "query": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"2..\",job=\"${job_lifecycle_bucket_processor}\",origin=\"bucket\"}[$__rate_interval]))", "refId": "", "step": 10, "target": "" }, { "datasource": null, - "expr": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"3..\",job=\"${job_lifecycle_bucket_processor}\",origin=\"bucket\"}[$__interval]))", + "expr": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"3..\",job=\"${job_lifecycle_bucket_processor}\",origin=\"bucket\"}[$__rate_interval]))", "format": "time_series", "hide": false, "instant": false, @@ -1312,14 +1312,14 @@ "intervalFactor": 1, "legendFormat": "HTTP 3xx", "metric": "", - "query": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"3..\",job=\"${job_lifecycle_bucket_processor}\",origin=\"bucket\"}[$__interval]))", + "query": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"3..\",job=\"${job_lifecycle_bucket_processor}\",origin=\"bucket\"}[$__rate_interval]))", "refId": "", "step": 10, "target": "" }, { "datasource": null, - "expr": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"4..\",job=\"${job_lifecycle_bucket_processor}\",origin=\"bucket\"}[$__interval]))", + "expr": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"4..\",job=\"${job_lifecycle_bucket_processor}\",origin=\"bucket\"}[$__rate_interval]))", "format": "time_series", "hide": false, "instant": false, @@ -1327,14 +1327,14 @@ "intervalFactor": 1, "legendFormat": "HTTP 4xx", "metric": "", - "query": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"4..\",job=\"${job_lifecycle_bucket_processor}\",origin=\"bucket\"}[$__interval]))", + "query": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"4..\",job=\"${job_lifecycle_bucket_processor}\",origin=\"bucket\"}[$__rate_interval]))", "refId": "", "step": 10, "target": "" }, { "datasource": null, - "expr": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"5..\",job=\"${job_lifecycle_bucket_processor}\",origin=\"bucket\"}[$__interval]))", + "expr": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"5..\",job=\"${job_lifecycle_bucket_processor}\",origin=\"bucket\"}[$__rate_interval]))", "format": "time_series", "hide": false, "instant": false, @@ -1342,7 +1342,7 @@ "intervalFactor": 1, "legendFormat": "HTTP 5xx", "metric": "", - "query": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"5..\",job=\"${job_lifecycle_bucket_processor}\",origin=\"bucket\"}[$__interval]))", + "query": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"5..\",job=\"${job_lifecycle_bucket_processor}\",origin=\"bucket\"}[$__rate_interval]))", "refId": "", "step": 10, "target": "" @@ -1894,7 +1894,7 @@ "targets": [ { "datasource": null, - "expr": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"2..\",job=\"${job_lifecycle_object_processor}\",origin=\"expiration\"}[$__interval]))", + "expr": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"2..\",job=\"${job_lifecycle_object_processor}\",origin=\"expiration\"}[$__rate_interval]))", "format": "time_series", "hide": false, "instant": false, @@ -1902,14 +1902,14 @@ "intervalFactor": 1, "legendFormat": "HTTP 2xx", "metric": "", - "query": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"2..\",job=\"${job_lifecycle_object_processor}\",origin=\"expiration\"}[$__interval]))", + "query": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"2..\",job=\"${job_lifecycle_object_processor}\",origin=\"expiration\"}[$__rate_interval]))", "refId": "", "step": 10, "target": "" }, { "datasource": null, - "expr": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"3..\",job=\"${job_lifecycle_object_processor}\",origin=\"expiration\"}[$__interval]))", + "expr": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"3..\",job=\"${job_lifecycle_object_processor}\",origin=\"expiration\"}[$__rate_interval]))", "format": "time_series", "hide": false, "instant": false, @@ -1917,14 +1917,14 @@ "intervalFactor": 1, "legendFormat": "HTTP 3xx", "metric": "", - "query": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"3..\",job=\"${job_lifecycle_object_processor}\",origin=\"expiration\"}[$__interval]))", + "query": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"3..\",job=\"${job_lifecycle_object_processor}\",origin=\"expiration\"}[$__rate_interval]))", "refId": "", "step": 10, "target": "" }, { "datasource": null, - "expr": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"4..\",job=\"${job_lifecycle_object_processor}\",origin=\"expiration\"}[$__interval]))", + "expr": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"4..\",job=\"${job_lifecycle_object_processor}\",origin=\"expiration\"}[$__rate_interval]))", "format": "time_series", "hide": false, "instant": false, @@ -1932,14 +1932,14 @@ "intervalFactor": 1, "legendFormat": "HTTP 4xx", "metric": "", - "query": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"4..\",job=\"${job_lifecycle_object_processor}\",origin=\"expiration\"}[$__interval]))", + "query": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"4..\",job=\"${job_lifecycle_object_processor}\",origin=\"expiration\"}[$__rate_interval]))", "refId": "", "step": 10, "target": "" }, { "datasource": null, - "expr": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"5..\",job=\"${job_lifecycle_object_processor}\",origin=\"expiration\"}[$__interval]))", + "expr": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"5..\",job=\"${job_lifecycle_object_processor}\",origin=\"expiration\"}[$__rate_interval]))", "format": "time_series", "hide": false, "instant": false, @@ -1947,7 +1947,7 @@ "intervalFactor": 1, "legendFormat": "HTTP 5xx", "metric": "", - "query": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"5..\",job=\"${job_lifecycle_object_processor}\",origin=\"expiration\"}[$__interval]))", + "query": "sum(increase(s3_lifecycle_s3_operations_total{namespace=\"${namespace}\",status=~\"5..\",job=\"${job_lifecycle_object_processor}\",origin=\"expiration\"}[$__rate_interval]))", "refId": "", "step": 10, "target": "" diff --git a/monitoring/lifecycle/dashboard.py b/monitoring/lifecycle/dashboard.py index 38468f5f4..10cabe752 100644 --- a/monitoring/lifecycle/dashboard.py +++ b/monitoring/lifecycle/dashboard.py @@ -35,7 +35,7 @@ def s3_request_timeseries_expr(process, job, code): if process is not None: labelSelector += f',origin="{process}"' - return f'sum(increase(s3_lifecycle_s3_operations_total{{{labelSelector}}}[$__interval]))' + return f'sum(increase(s3_lifecycle_s3_operations_total{{{labelSelector}}}[$__rate_interval]))' def s3_request_timeseries(title, process=None, job=None): @@ -157,11 +157,11 @@ def kafka_row(topic, op): return [ kafka_messages_time_series( f'{topic} Messages in Queue', - f'sum(increase(s3_lifecycle_kafka_publish_success_total{{{label}}}[$__interval]))', + f'sum(increase(s3_lifecycle_kafka_publish_success_total{{{label}}}[$__rate_interval]))', ), kafka_messages_time_series( f'{topic} Failed Messages', - f'sum(increase(s3_lifecycle_kafka_publish_error_total{{{label}}}[$__interval]))', + f'sum(increase(s3_lifecycle_kafka_publish_error_total{{{label}}}[$__rate_interval]))', ), ]