Skip to content

Commit

Permalink
Initial metrics refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
njgheorghita committed Sep 28, 2023
1 parent 5b385c9 commit 22ac097
Show file tree
Hide file tree
Showing 17 changed files with 579 additions and 393 deletions.
6 changes: 6 additions & 0 deletions book/src/users/monitoring.md
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,12 @@ cargo run -p trin -- \
--web3-transport http
```

### Updating metrics dashboard
If there are new changes to the metrics dashboard template that you want to view in
an already-existing dashboard. The simplest way to update your dashboard is to delete
your `prometheus` datasource and `Trin App metrics` dashboard, and re-run the
`create-dashboard` command.

### View metrics remotely
Trin metrics on a remote machine can be monitored by listening to the grafana
address on a local machine.
Expand Down
2 changes: 2 additions & 0 deletions ethportal-api/src/dashboard/grafana.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ pub struct GrafanaAPI {
address: String,
}

// todo: automatically update datasource/dashboard via `create-dashboard` command
// rather than deleting and recreating them
impl GrafanaAPI {
pub fn new(username: String, password: String, address: String) -> Self {
let basic_auth_string = format!("{username}:{password}");
Expand Down
Empty file added portalnet/src/config.rs
Empty file.
1 change: 1 addition & 0 deletions portalnet/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#![warn(clippy::unwrap_used)]

pub mod config;
pub mod discovery;
pub mod events;
pub mod find;
Expand Down
1 change: 1 addition & 0 deletions portalnet/src/metrics/mod.rs
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
pub mod labels;
pub mod overlay;
pub mod storage;
310 changes: 164 additions & 146 deletions portalnet/src/metrics/overlay.rs

Large diffs are not rendered by default.

147 changes: 147 additions & 0 deletions portalnet/src/metrics/storage.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
use ethportal_api::types::distance::Distance;
use prometheus_exporter::{
self,
prometheus::{
opts, register_gauge_vec_with_registry, register_int_gauge_vec_with_registry, GaugeVec,
IntGaugeVec, Registry,
},
};

#[derive(Clone, Debug)]
pub struct StorageMetrics {
pub content_storage_usage_bytes: GaugeVec,
pub total_storage_usage_bytes: GaugeVec,
pub storage_capacity_bytes: GaugeVec,
pub radius_ratio: GaugeVec,
pub entry_count: IntGaugeVec,
}

const BYTES_IN_MB_F64: f64 = 1000.0 * 1000.0;

impl StorageMetrics {
pub fn new(registry: &Registry) -> anyhow::Result<Self> {
let content_storage_usage_bytes = register_gauge_vec_with_registry!(
opts!(
"trin_content_storage_usage_bytes",
"sum of size of individual content stored, in bytes"
),
&["protocol"],
registry
)?;
let total_storage_usage_bytes = register_gauge_vec_with_registry!(
opts!(
"trin_total_storage_usage_bytes",
"full on-disk database size, in bytes"
),
&["protocol"],
registry
)?;
let storage_capacity_bytes = register_gauge_vec_with_registry!(
opts!(
"trin_storage_capacity_bytes",
"user-defined limit on storage usage, in bytes"
),
&["protocol"],
registry
)?;
let radius_ratio = register_gauge_vec_with_registry!(
opts!(
"trin_radius_ratio",
"the fraction of the whole data ring covered by the data radius"
),
&["protocol"],
registry
)?;
let entry_count = register_int_gauge_vec_with_registry!(
opts!("trin_entry_count", "total number of storage entries"),
&["protocol"],
registry
)?;
Ok(Self {
content_storage_usage_bytes,
total_storage_usage_bytes,
storage_capacity_bytes,
radius_ratio,
entry_count,
})
}

pub fn report_content_data_storage_bytes(&self, protocol: &str, bytes: f64) {
self.content_storage_usage_bytes
.with_label_values(&[protocol])
.set(bytes);
}

pub fn report_total_storage_usage_bytes(&self, protocol: &str, bytes: f64) {
self.total_storage_usage_bytes
.with_label_values(&[protocol])
.set(bytes);
}

pub fn report_storage_capacity_bytes(&self, protocol: &str, bytes: f64) {
self.storage_capacity_bytes
.with_label_values(&[protocol])
.set(bytes);
}

pub fn report_radius(&self, protocol: &str, radius: Distance) {
let radius_high_bytes = [
radius.byte(31),
radius.byte(30),
radius.byte(29),
radius.byte(28),
];
let radius_int = u32::from_be_bytes(radius_high_bytes);
let coverage_ratio = radius_int as f64 / u32::MAX as f64;
self.radius_ratio
.with_label_values(&[protocol])
.set(coverage_ratio);
}

pub fn report_entry_count(&self, protocol: &str, count: u64) {
let count: i64 = count
.try_into()
.expect("Number of db entries will be small enough to fit in i64");
self.entry_count.with_label_values(&[protocol]).set(count);
}

pub fn increase_entry_count(&self, protocol: &str) {
self.entry_count.with_label_values(&[protocol]).inc();
}

pub fn decrease_entry_count(&self, protocol: &str) {
self.entry_count.with_label_values(&[protocol]).dec();
}

pub fn get_summary(&self, protocol: &str) -> String {
let radius_percent = self.radius_ratio.with_label_values(&[protocol]).get() * 100.0;
format!(
"radius={:.*}% content={:.1}/{}mb #={} disk={:.1}mb",
Self::precision_for_percentage(radius_percent),
radius_percent,
self.content_storage_usage_bytes
.with_label_values(&[protocol])
.get()
/ BYTES_IN_MB_F64,
self.storage_capacity_bytes
.with_label_values(&[protocol])
.get()
/ BYTES_IN_MB_F64,
self.entry_count.with_label_values(&[protocol]).get(),
self.total_storage_usage_bytes
.with_label_values(&[protocol])
.get()
/ BYTES_IN_MB_F64,
)
}

pub fn precision_for_percentage(percent: f64) -> usize {
match percent {
x if x >= 10.0 => 0,
x if x >= 1.0 => 1,
x if x >= 0.1 => 2,
x if x >= 0.01 => 3,
_ => 4,
}
}
}
15 changes: 8 additions & 7 deletions portalnet/src/overlay.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ where
store: Arc<RwLock<TStore>>,
protocol: ProtocolId,
validator: Arc<TValidator>,
metrics: OverlayMetrics,
) -> Self {
let kbuckets = Arc::new(RwLock::new(KBucketsTable::new(
discovery.local_enr().node_id().into(),
Expand All @@ -140,9 +141,7 @@ where
config.table_filter,
config.bucket_filter,
)));

// Initialize metrics, keep a reference in order to build metrics summaries for logging
let metrics = Arc::new(OverlayMetrics::new(&protocol));
let metrics = Arc::new(metrics);

let command_tx = OverlayService::<TContentKey, TMetric, TValidator, TStore>::spawn(
Arc::clone(&discovery),
Expand Down Expand Up @@ -482,11 +481,13 @@ where
) -> anyhow::Result<()> {
match self.validator.validate_content(content_key, content).await {
Ok(_) => {
self.metrics.report_validation(true);
self.metrics
.report_validation(&self.protocol.to_string(), true);
Ok(())
}
Err(msg) => {
self.metrics.report_validation(false);
self.metrics
.report_validation(&self.protocol.to_string(), false);
Err(anyhow!(
"Content validation failed for content key {:?} with error: {:?}",
content_key,
Expand Down Expand Up @@ -725,11 +726,11 @@ where
}

pub fn get_message_summary(&self) -> String {
self.metrics.get_message_summary()
self.metrics.get_message_summary(&self.protocol.to_string())
}

pub fn get_utp_summary(&self) -> String {
self.metrics.get_utp_summary()
self.metrics.get_utp_summary(&self.protocol.to_string())
}

/// Creates an event stream channel which can be polled to receive overlay events.
Expand Down
Loading

0 comments on commit 22ac097

Please sign in to comment.