From b2654f0600c6dd8feb74036ebd8eeb1115125015 Mon Sep 17 00:00:00 2001 From: Kamikaza731 Date: Sat, 14 Dec 2024 12:22:41 +0100 Subject: [PATCH 1/3] Small change turn all prometheus metrics to gauge --- README.md | 2 +- tnom/main.py | 2 +- tnom/prometheus_client_endpoint.py | 12 ++++++------ 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 352bfa4..13c8062 100644 --- a/README.md +++ b/README.md @@ -372,7 +372,7 @@ docker run -d \ -v $(pwd)/config:/app/config \ -v $(pwd)/chain_database:/app/chain_database \ --name tnom \ - tnom:v0.5.1 + tnom:v0.5.2 ``` # Prometheus metrics diff --git a/tnom/main.py b/tnom/main.py index 5e9da7f..a7afc4a 100644 --- a/tnom/main.py +++ b/tnom/main.py @@ -419,7 +419,7 @@ def setup_argument_parser() -> argparse.ArgumentParser: parser.add_argument( "--version", action="version", - version="v0.5.1", + version="v0.5.2", ) parser.add_argument( diff --git a/tnom/prometheus_client_endpoint.py b/tnom/prometheus_client_endpoint.py index 750e121..ccf4a08 100644 --- a/tnom/prometheus_client_endpoint.py +++ b/tnom/prometheus_client_endpoint.py @@ -18,7 +18,7 @@ import hypercorn.config from database_handler import read_current_epoch_data, read_last_recorded_epoch from fastapi import FastAPI -from prometheus_client import Counter, Gauge, make_asgi_app +from prometheus_client import Gauge, make_asgi_app class PrometheusMetrics: @@ -54,17 +54,17 @@ def __init__(self, db_path: Path, epoch: int) -> None: "Total number of miss counter events", ) - self.miss_counter_events_p1_executed = Counter( + self.miss_counter_events_p1_executed = Gauge( f"{namespace}_miss_counter_events_p1_executed", "P1 alert executed", ) - self.miss_counter_events_p2_executed = Counter( + self.miss_counter_events_p2_executed = Gauge( f"{namespace}_miss_counter_events_p2_executed", "P2 alert executed", ) - self.miss_counter_events_p3_executed = Counter( + self.miss_counter_events_p3_executed = Gauge( f"{namespace}_miss_counter_events_p3_executed", "P3 alert executed", ) @@ -79,12 +79,12 @@ def __init__(self, db_path: Path, epoch: int) -> None: "Price feed wallet unibi balance", ) - self.small_balance_alert = Counter( + self.small_balance_alert = Gauge( f"{namespace}_small_balance_alert_executed", "Small balance alert executed", ) - self.very_small_balance_alert = Counter( + self.very_small_balance_alert = Gauge( f"{namespace}_very_small_balance_alert_executed", "Very small balance alert executed", ) From 95384079657e3f12e41be520769241a2add0d919 Mon Sep 17 00:00:00 2001 From: Kamikaza731 Date: Sat, 14 Dec 2024 17:30:40 +0100 Subject: [PATCH 2/3] Added grafana json --- CHANGELOG.md | 17 +- README.md | 34 +- grafana_basic_dashboard.json | 862 +++++++++++++++++++++++++++++++++++ 3 files changed, 911 insertions(+), 2 deletions(-) create mode 100644 grafana_basic_dashboard.json diff --git a/CHANGELOG.md b/CHANGELOG.md index 701ae14..dc62db0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,22 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [Unreleased] +## [0.5.2] - 2024-12-14 + +### Added + +- Grafana json with the basic dashboard + +### Changed + +- Changed some prometheus metrics to gauge, all metrics related to tnom are now gauges + +## [0.5.1] - 2024-12-14 + +### Fixed + +- A bug that caused problem updating the database +- Bug where Prometheus sometimes wouldn't update (also related to the database) ## [0.5.0] - 2024-12-11 diff --git a/README.md b/README.md index 13c8062..c1e0f7a 100644 --- a/README.md +++ b/README.md @@ -378,4 +378,36 @@ docker run -d \ # Prometheus metrics Prometheus metrics are available at http://localhost:7130/metrics by default. -Should add soon whole metrics page when some parameters are adjusted. + +## Script/Program metrics + +| Metric | Description | +| --- | --- | +| python_gc_objects_collected_total | Tracks the number of objects collected by Python's garbage collector +Broken down by generation (0, 1, 2) | +| python_gc_objects_uncollectable_total | Tracks the number of objects that cannot be collected by Python's garbage collector | +| python_gc_collections_total | Number of times garbage collection was run for each generation | +| python_info | Information about the Python interpreter | +| process_virtual_memory_bytes | Total virtual memory used by the process +Includes all memory allocated, including RAM and swap | +| process_resident_memory_bytes | Total memory used by the process by actual RAM | +| process_start_time_seconds | Unix timestamp when the process started | +| process_cpu_seconds_total | Total user and system CPU time spent in seconds | +| process_open_fds | Number of open file descriptors | +| process_max_fds | Maximum number of file descriptors that this process has | + +## TNOM metrics + +| Metric | Description | +| --- | --- | +| tnom_slash_epoch | Current epoch | +| tnom_api_miss_counter_events | In the events APIs are malfunctioning this parameter will grow. Indicates the number of consecutive API failures. Probably chain is down or APIs are down. | +| tnom_unsigned_oracle_events | Total number of unsigned oracle events. When the price feeder doesn't sign the transaction this parameter will grow. | +| tnom_consecutive_misses | Number of consecutive misses of unsigned events. Grows when the price feeder doesn't sign the transaction consecutively | +| tnom_price_feed_balance | Price feed wallet in unibi balance | +| tnom_small_balance_alert_executed | Alert executed when the price feed wallet balance is less than 1 NIBI | +| tnom_very_small_balance_alert_executed | Alert executed when the price feed wallet balance is less than 0.1 NIBI | +| tnom_miss_counter_events | Total number of miss counter events (* not tested, probably related to the price feeder missing price targets) | +| tnom_miss_counter_events_p1_executed | P1 alert executed | +| tnom_miss_counter_events_p2_executed | P2 alert executed | +| tnom_miss_counter_events_p3_executed | P3 alert executed | \ No newline at end of file diff --git a/grafana_basic_dashboard.json b/grafana_basic_dashboard.json new file mode 100644 index 0000000..75e3508 --- /dev/null +++ b/grafana_basic_dashboard.json @@ -0,0 +1,862 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "TNOM is a monitoring tool for checking Nibiru price feeder if it is signing all the necessary Nibiru Oracle price updates", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 3, + "links": [ + { + "asDropdown": false, + "icon": "external link", + "includeVars": false, + "keepTime": false, + "tags": [], + "targetBlank": true, + "title": "TNOM - Github repo", + "tooltip": "Open Github repo", + "type": "link", + "url": "https://github.com/Cogwheel-Validator/tnom" + } + ], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 11, + "panels": [], + "title": "Basic", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ee2ym7o4ds0sge" + }, + "description": "Wallet balance of the price feeder", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "displayName": "NIBI", + "mappings": [], + "max": 10, + "min": 1, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "dark-red", + "value": 0.1 + }, + { + "color": "red", + "value": 1 + }, + { + "color": "#EAB839", + "value": 2 + }, + { + "color": "super-light-yellow", + "value": 3 + }, + { + "color": "light-green", + "value": 4 + }, + { + "color": "semi-dark-green", + "value": 5 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 1 + }, + "id": 2, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto", + "text": {} + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "builder", + "expr": "tnom_price_feed_balance{job=\"$job_name\"} / 1000000", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Price feeder wallet balance", + "type": "gauge" + }, + { + "datasource": { + "uid": "ee2ym7o4ds0sge" + }, + "description": "Showing current epoch", + "fieldConfig": { + "defaults": { + "color": { + "mode": "fixed" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 1 + }, + "id": 3, + "options": { + "colorMode": "none", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "value", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "builder", + "expr": "tnom_slash_epoch{job=\"$job_name\"}", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Current Epoch", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ee2ym7o4ds0sge" + }, + "description": "It shows the amount of times price feeder didn't sign oracle price update transactions", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "max": 49, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 5 + }, + { + "color": "orange", + "value": 10 + }, + { + "color": "red", + "value": 20 + }, + { + "color": "semi-dark-red", + "value": 40 + }, + { + "color": "dark-red", + "value": 50 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 12, + "y": 1 + }, + "id": 4, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "builder", + "expr": "tnom_unsigned_oracle_events{job=\"$job_name\"}", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Unsigned Events", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ee2ym7o4ds0sge" + }, + "description": "This gague differs from unsigned events. It is for monitroing if price feed missed price point. ", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "max": 50, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "super-light-green", + "value": 10 + }, + { + "color": "yellow", + "value": 20 + }, + { + "color": "orange", + "value": 30 + }, + { + "color": "red", + "value": 40 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 1 + }, + "id": 10, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "builder", + "expr": "tnom_miss_counter_events{job=\"$job_name\"}", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Miss events", + "type": "gauge" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 7 + }, + "id": 12, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "ee2ym7o4ds0sge" + }, + "description": "No APIs working for X amount of checks", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "max": 10, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 1 + }, + { + "color": "orange", + "value": 3 + }, + { + "color": "red", + "value": 5 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 0, + "y": 8 + }, + "id": 1, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ee2ym7o4ds0sge" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "tnom_api_cons_miss{job=\"$job_name\"}", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "No APIs working", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ee2ym7o4ds0sge" + }, + "description": "True if executed false if not executed", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "bool" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 6, + "y": 8 + }, + "id": 7, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "code", + "expr": "tnom_miss_counter_events_p1_executed{job=\"$job_name\"}", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Executed P1 Alert for miss counter", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ee2ym7o4ds0sge" + }, + "description": "True if executed false if not executed", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "bool" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 12, + "y": 8 + }, + "id": 6, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "builder", + "expr": "tnom_miss_counter_events_p2_executed{job=\"$job_name\"}", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Executed P2 Alert for miss counter", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ee2ym7o4ds0sge" + }, + "description": "True if executed false if not executed", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "bool" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 18, + "y": 8 + }, + "id": 5, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "builder", + "expr": "tnom_miss_counter_events_p3_executed{job=\"$job_name\"}", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Executed P3 Alert for miss counter", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ee2ym7o4ds0sge" + }, + "description": "True if executed false if not executed", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "bool" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 0, + "y": 13 + }, + "id": 8, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "builder", + "expr": "tnom_very_small_balance_alert_executed{job=\"$job_name\"}", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Executed very small balance alert", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ee2ym7o4ds0sge" + }, + "description": "True if executed false if not executed", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "bool" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 6, + "y": 13 + }, + "id": 9, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "builder", + "expr": "tnom_small_balance_alert_executed{job=\"$job_name\"}", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Executed Small balance Alert ", + "type": "stat" + } + ], + "title": "Alerts", + "type": "row" + } + ], + "preload": false, + "refresh": "30s", + "schemaVersion": 40, + "tags": [], + "templating": { + "list": [ + { + "current": { + "text": "tnom", + "value": "tnom" + }, + "definition": "label_values({instance=\"localhost:7130\"},job)", + "label": "job name", + "name": "job_name", + "options": [], + "query": { + "qryType": 1, + "query": "label_values({instance=\"localhost:7130\"},job)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "type": "query" + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "TNOM", + "uid": "fe6txgrmczz0gf", + "version": 36, + "weekStart": "" + } \ No newline at end of file From 512f9aab2b60dff83f1e93619c30cfd1c8ce09a7 Mon Sep 17 00:00:00 2001 From: Kamikaza731 Date: Sat, 14 Dec 2024 17:37:35 +0100 Subject: [PATCH 3/3] Update project version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 6078a9b..13b8723 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "tnom" -version = "0.5.0" +version = "0.5.2" description = "The Nibiru Oracle Monitoring is a tool for monitoring signer wallet for Nibiru Oracle." authors = ["Kamikaza731"] readme = "README.md"