diff --git a/batcher/aligned-batcher/src/lib.rs b/batcher/aligned-batcher/src/lib.rs index e5588d4e3..9c4f336e5 100644 --- a/batcher/aligned-batcher/src/lib.rs +++ b/batcher/aligned-batcher/src/lib.rs @@ -16,6 +16,7 @@ use tokio::time::{timeout, Instant}; use types::batch_state::BatchState; use types::user_state::UserState; +use batch_queue::calculate_batch_size; use std::collections::HashMap; use std::env; use std::net::SocketAddr; @@ -1043,10 +1044,13 @@ impl Batcher { BatchQueueEntryPriority::new(max_fee, nonce), ); - info!( - "Current batch queue length: {}", - batch_state_lock.batch_queue.len() - ); + // Update metrics + let queue_len = batch_state_lock.batch_queue.len(); + let queue_size_bytes = calculate_batch_size(&batch_state_lock.batch_queue)?; + self.metrics + .update_queue_metrics(queue_len as i64, queue_size_bytes as i64); + + info!("Current batch queue length: {}", queue_len); let mut proof_submitter_addr = proof_submitter_addr; @@ -1226,6 +1230,13 @@ impl Batcher { ))?; } + // Update metrics + let queue_len = batch_state_lock.batch_queue.len(); + let queue_size_bytes = calculate_batch_size(&batch_state_lock.batch_queue)?; + + self.metrics + .update_queue_metrics(queue_len as i64, queue_size_bytes as i64); + Ok(()) } @@ -1373,6 +1384,8 @@ impl Batcher { batch_state_lock .user_states .insert(nonpaying_replacement_addr, nonpaying_user_state); + + self.metrics.update_queue_metrics(0, 0); } /// Receives new block numbers, checks if conditions are met for submission and diff --git a/batcher/aligned-batcher/src/metrics.rs b/batcher/aligned-batcher/src/metrics.rs index dccab58f3..267c1de1e 100644 --- a/batcher/aligned-batcher/src/metrics.rs +++ b/batcher/aligned-batcher/src/metrics.rs @@ -19,6 +19,8 @@ pub struct BatcherMetrics { pub batcher_started: IntCounter, pub gas_price_used_on_latest_batch: IntGauge, pub broken_ws_connections: IntCounter, + pub queue_len: IntGauge, + pub queue_size_bytes: IntGauge, pub s3_duration: IntGauge, pub create_new_task_duration: IntGauge, pub cancel_create_new_task_duration: IntGauge, @@ -49,6 +51,11 @@ impl BatcherMetrics { "broken_ws_connections_count", "Broken websocket connections" ))?; + let queue_len = register_int_gauge!(opts!("queue_len", "Amount of proofs in the queue"))?; + let queue_size_bytes = register_int_gauge!(opts!( + "queue_size_bytes", + "Accumulated size in bytes of all proofs in the queue" + ))?; let s3_duration = register_int_gauge!(opts!("s3_duration", "S3 Duration"))?; let create_new_task_duration = register_int_gauge!(opts!( "create_new_task_duration", @@ -68,6 +75,8 @@ impl BatcherMetrics { registry.register(Box::new(gas_price_used_on_latest_batch.clone()))?; registry.register(Box::new(batcher_started.clone()))?; registry.register(Box::new(broken_ws_connections.clone()))?; + registry.register(Box::new(queue_len.clone()))?; + registry.register(Box::new(queue_size_bytes.clone()))?; registry.register(Box::new(s3_duration.clone()))?; registry.register(Box::new(create_new_task_duration.clone()))?; registry.register(Box::new(cancel_create_new_task_duration.clone()))?; @@ -92,6 +101,8 @@ impl BatcherMetrics { batcher_started, gas_price_used_on_latest_batch, broken_ws_connections, + queue_len, + queue_size_bytes, s3_duration, create_new_task_duration, cancel_create_new_task_duration, @@ -124,4 +135,9 @@ impl BatcherMetrics { pub fn user_error(&self, label_values: &[&str]) { self.user_errors.with_label_values(label_values).inc(); } + + pub fn update_queue_metrics(&self, queue_len: i64, queue_size: i64) { + self.queue_len.set(queue_len); + self.queue_size_bytes.set(queue_size); + } } diff --git a/grafana/provisioning/dashboards/aligned/aggregator_batcher.json b/grafana/provisioning/dashboards/aligned/aggregator_batcher.json index e11a694a3..0981afa67 100644 --- a/grafana/provisioning/dashboards/aligned/aggregator_batcher.json +++ b/grafana/provisioning/dashboards/aligned/aggregator_batcher.json @@ -18,7 +18,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 2, + "id": 5, "links": [], "liveNow": false, "panels": [ @@ -1330,31 +1330,6 @@ "title": "Total Batcher Restarts", "type": "timeseries" }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 33 - }, - "id": 39, - "options": { - "code": { - "language": "plaintext", - "showLineNumbers": false, - "showMiniMap": false - }, - "content": "

\n SYSTEM STATUS\n

", - "mode": "html" - }, - "pluginVersion": "10.1.10", - "transparent": true, - "type": "text" - }, { "datasource": { "type": "prometheus", @@ -1364,7 +1339,7 @@ "h": 2, "w": 12, "x": 0, - "y": 34 + "y": 33 }, "id": 36, "options": { @@ -1389,7 +1364,7 @@ "h": 2, "w": 12, "x": 12, - "y": 34 + "y": 33 }, "id": 37, "options": { @@ -1440,7 +1415,7 @@ "h": 6, "w": 3, "x": 0, - "y": 36 + "y": 35 }, "id": 14, "options": { @@ -1544,7 +1519,7 @@ "h": 6, "w": 9, "x": 3, - "y": 36 + "y": 35 }, "id": 13, "options": { @@ -1608,7 +1583,7 @@ "h": 6, "w": 3, "x": 12, - "y": 36 + "y": 35 }, "id": 7, "options": { @@ -1712,7 +1687,7 @@ "h": 6, "w": 9, "x": 15, - "y": 36 + "y": 35 }, "id": 40, "options": { @@ -1784,7 +1759,7 @@ "h": 6, "w": 3, "x": 0, - "y": 42 + "y": 41 }, "id": 15, "options": { @@ -1888,7 +1863,7 @@ "h": 6, "w": 9, "x": 3, - "y": 42 + "y": 41 }, "id": 19, "options": { @@ -1951,7 +1926,7 @@ "h": 6, "w": 3, "x": 12, - "y": 42 + "y": 41 }, "id": 5, "options": { @@ -2056,7 +2031,7 @@ "h": 6, "w": 9, "x": 15, - "y": 42 + "y": 41 }, "id": 1, "options": { @@ -2128,7 +2103,7 @@ "h": 6, "w": 3, "x": 0, - "y": 48 + "y": 47 }, "id": 22, "options": { @@ -2232,7 +2207,7 @@ "h": 6, "w": 9, "x": 3, - "y": 48 + "y": 47 }, "id": 21, "options": { @@ -2331,7 +2306,7 @@ "h": 6, "w": 10, "x": 12, - "y": 48 + "y": 47 }, "id": 25, "interval": "36", @@ -2373,7 +2348,78 @@ "type": "prometheus", "uid": "prometheus" }, - "description": "Measures websocket connections that were abnormally disconnected.", + "description": "Number of proofs in the queue", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 3, + "x": 0, + "y": 53 + }, + "id": 52, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.1.10", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "code", + "exemplar": false, + "expr": "queue_len", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Number of proofs in the queue", + "range": true, + "refId": "Queue Length", + "useBackend": false + } + ], + "title": "Queue Length", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "Number of proofs in the queue", "fieldConfig": { "defaults": { "color": { @@ -2396,7 +2442,7 @@ "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, - "pointSize": 1, + "pointSize": 5, "scaleDistribution": { "type": "linear" }, @@ -2417,10 +2463,6 @@ { "color": "green", "value": null - }, - { - "color": "red", - "value": 80 } ] } @@ -2429,18 +2471,17 @@ }, "gridPos": { "h": 7, - "w": 12, - "x": 0, - "y": 54 + "w": 9, + "x": 3, + "y": 53 }, - "id": 20, - "interval": "1m", + "id": 51, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", - "showLegend": true + "showLegend": false }, "tooltip": { "mode": "single", @@ -2455,17 +2496,17 @@ }, "disableTextWrap": false, "editorMode": "code", - "expr": "floor(increase(broken_ws_connections_count{job=\"aligned-batcher\"}[10y]))", + "expr": "queue_len", "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, - "legendFormat": "__auto", + "legendFormat": "Queue Length", "range": true, - "refId": "A", + "refId": "Queue Length", "useBackend": false } ], - "title": "Broken websocket connections", + "title": "Queue Length", "type": "timeseries" }, { @@ -2552,7 +2593,7 @@ "h": 7, "w": 10, "x": 12, - "y": 54 + "y": 53 }, "id": 28, "options": { @@ -2588,6 +2629,274 @@ "title": "# Times Aggregator Paid Extra Cost", "type": "timeseries" }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "Sum of all proof sizes in the queue", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 3, + "x": 0, + "y": 60 + }, + "id": 50, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.1.10", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "code", + "exemplar": false, + "expr": "queue_size_bytes", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Size in bytes of the queue", + "range": true, + "refId": "Queue Size", + "useBackend": false + } + ], + "title": "Queue Size", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "Sum of all proof sizes in the queue", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 9, + "x": 3, + "y": 60 + }, + "id": 49, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "queue_size_bytes", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Queue Size", + "range": true, + "refId": "Queue Size", + "useBackend": false + } + ], + "title": "Queue Size", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "Measures websocket connections that were abnormally disconnected.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 67 + }, + "id": 20, + "interval": "1m", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "floor(increase(broken_ws_connections_count{job=\"aligned-batcher\"}[10y]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Broken websocket connections", + "type": "timeseries" + }, { "datasource": { "type": "prometheus", @@ -2650,7 +2959,7 @@ "h": 8, "w": 12, "x": 0, - "y": 61 + "y": 74 }, "id": 24, "options": { @@ -2730,7 +3039,7 @@ "h": 2, "w": 24, "x": 0, - "y": 69 + "y": 82 }, "id": 46, "options": { @@ -2805,7 +3114,7 @@ "h": 8, "w": 12, "x": 0, - "y": 71 + "y": 84 }, "id": 47, "options": { @@ -2905,7 +3214,7 @@ "h": 8, "w": 12, "x": 12, - "y": 71 + "y": 84 }, "id": 43, "interval": "1s", @@ -3002,7 +3311,7 @@ "h": 8, "w": 12, "x": 0, - "y": 79 + "y": 92 }, "id": 45, "options": { @@ -3133,7 +3442,7 @@ "h": 8, "w": 12, "x": 12, - "y": 79 + "y": 92 }, "id": 44, "interval": "1s", @@ -3168,7 +3477,7 @@ "type": "timeseries" } ], - "refresh": "", + "refresh": "30s", "schemaVersion": 38, "style": "dark", "tags": [], @@ -3183,6 +3492,6 @@ "timezone": "browser", "title": "System Data", "uid": "aggregator", - "version": 7, + "version": 38, "weekStart": "" }