Skip to content

Commit

Permalink
metrics: add latency histogram statistics
Browse files Browse the repository at this point in the history
I've added histogram metrics used in cpp-rust-driver.

The snapshot of histogram statistics is taken under concurrency precautions using lock-free histogram features.

I've adjusted the docs book adding an example of taking the snapshot and accessing it's values.
  • Loading branch information
QuerthDP committed Dec 11, 2024
1 parent e1e7201 commit d989a59
Show file tree
Hide file tree
Showing 5 changed files with 143 additions and 1 deletion.
15 changes: 15 additions & 0 deletions docs/source/metrics/metrics.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Driver metrics

This feature is available only under the crate feature `metrics`.

During operation the driver collects various metrics.

They can be accessed at any moment using `Session::get_metrics()`
Expand All @@ -11,6 +13,7 @@ They can be accessed at any moment using `Session::get_metrics()`
* Total number of paged queries
* Number of errors during paged queries
* Number of retries
* Latency histogram statistics (min, max, mean, standard deviation, percentiles)

### Example
```rust
Expand All @@ -29,6 +32,18 @@ println!(
"99.9 latency percentile: {}",
metrics.get_latency_percentile_ms(99.9).unwrap()
);

let snapshot = metrics.get_snapshot().unwrap();
println!("Min: {}", snapshot.min);
println!("Max: {}", snapshot.max);
println!("Mean: {}", snapshot.mean);
println!("Standard deviation: {}", snapshot.stddev);
println!("Median: {}", snapshot.median);
println!("75th percentile: {}", snapshot.percentile_75);
println!("90th percentile: {}", snapshot.percentile_90);
println!("95th percentile: {}", snapshot.percentile_95);
println!("99th percentile: {}", snapshot.percentile_99);
println!("99.9th percentile: {}", snapshot.percentile_99_9);
# Ok(())
# }
```
12 changes: 12 additions & 0 deletions examples/basic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,18 @@ async fn main() -> Result<()> {
metrics.get_latency_percentile_ms(99.9).unwrap()
);

let snapshot = metrics.get_snapshot().unwrap();
println!("Min: {}", snapshot.min);
println!("Max: {}", snapshot.max);
println!("Mean: {}", snapshot.mean);
println!("Standard deviation: {}", snapshot.stddev);
println!("Median: {}", snapshot.median);
println!("75th percentile: {}", snapshot.percentile_75);
println!("90th percentile: {}", snapshot.percentile_90);
println!("95th percentile: {}", snapshot.percentile_95);
println!("99th percentile: {}", snapshot.percentile_99);
println!("99.9th percentile: {}", snapshot.percentile_99_9);

println!("Ok.");

Ok(())
Expand Down
106 changes: 106 additions & 0 deletions scylla/src/transport/histogram/lock_free_histogram.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,23 @@ pub struct Histogram {
config: Config,
}

/// Snapshot is a structure that contains histogram statistics such as
/// min, max, mean, standard deviation, median, and most common percentiles
/// collected in a certain moment.
#[derive(Debug)]
pub struct Snapshot {
pub min: u64,
pub max: u64,
pub mean: u64,
pub stddev: u64,
pub median: u64,
pub percentile_75: u64,
pub percentile_90: u64,
pub percentile_95: u64,
pub percentile_99: u64,
pub percentile_99_9: u64,
}

impl Histogram {
pub fn new() -> Self {
let grouping_power = 7;
Expand Down Expand Up @@ -109,6 +126,95 @@ impl Histogram {
}
}

pub fn snapshot() -> impl FnOnce(&[AtomicU64], &Config) -> Result<Snapshot, &'static str> {
|buckets, config| {
let total_count = Histogram::get_total_count(buckets);

let mut min = u64::MAX;
let mut max = 0;
let mut weighted_sum = 0;
let mut pref_sum = 0;
let mut percentile_75 = 0;
let mut percentile_90 = 0;
let mut percentile_95 = 0;
let mut percentile_99 = 0;
let mut percentile_99_9 = 0;

let percentile_75_threshold = (0.75 * total_count as f64).ceil() as u128;
let percentile_90_threshold = (0.9 * total_count as f64).ceil() as u128;
let percentile_95_threshold = (0.95 * total_count as f64).ceil() as u128;
let percentile_99_threshold = (0.99 * total_count as f64).ceil() as u128;
let percentile_99_9_threshold = (0.999 * total_count as f64).ceil() as u128;

for (i, bucket) in buckets.iter().enumerate() {
let count = bucket.load(ORDER_TYPE) as u128;
if count == 0 {
continue;
}

let lower_bound = config.index_to_lower_bound(i);
let upper_bound = config.index_to_upper_bound(i);

if lower_bound < min {
min = lower_bound;
}
if upper_bound > max {
max = upper_bound;
}

weighted_sum += count * lower_bound as u128;

let next_pref_sum = pref_sum + count;
if pref_sum < percentile_75_threshold && next_pref_sum >= percentile_75_threshold {
percentile_75 = lower_bound;
}
if pref_sum < percentile_90_threshold && next_pref_sum >= percentile_90_threshold {
percentile_90 = lower_bound;
}
if pref_sum < percentile_95_threshold && next_pref_sum >= percentile_95_threshold {
percentile_95 = lower_bound;
}
if pref_sum < percentile_99_threshold && next_pref_sum >= percentile_99_threshold {
percentile_99 = lower_bound;
}
if pref_sum < percentile_99_9_threshold
&& next_pref_sum >= percentile_99_9_threshold
{
percentile_99_9 = lower_bound;
}

pref_sum = next_pref_sum;
}

let mean = (weighted_sum / total_count) as u64;
let mut variance_sum = 0;
for (i, bucket) in buckets.iter().enumerate() {
let count = bucket.load(ORDER_TYPE) as u128;
if count == 0 {
continue;
}

let lower_bound = config.index_to_lower_bound(i);
variance_sum += count * (lower_bound as u128 - mean as u128).pow(2);
}
let variance = variance_sum / total_count;
let stddev = (variance as f64).sqrt() as u64;

Ok(Snapshot {
min,
max,
mean,
stddev,
median: config.index_to_lower_bound(buckets.len() / 2),
percentile_75,
percentile_90,
percentile_95,
percentile_99,
percentile_99_9,
})
}
}

pub fn get_total_count(buckets: &[AtomicU64]) -> u128 {
buckets.iter().map(|v| v.load(ORDER_TYPE) as u128).sum()
}
Expand Down
1 change: 1 addition & 0 deletions scylla/src/transport/histogram/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ mod lock_free_histogram;

pub use config::Config;
pub use lock_free_histogram::Histogram;
pub use lock_free_histogram::Snapshot;
10 changes: 9 additions & 1 deletion scylla/src/transport/metrics.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::transport::histogram::Histogram;
use crate::transport::histogram::{Histogram, Snapshot};
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::Arc;

Expand Down Expand Up @@ -97,6 +97,14 @@ impl Metrics {
Ok(result)
}

/// Returns snapshot of histogram metrics taken at the moment of calling this function. \
/// Available metrics: min, max, mean, std_dev, median,
/// percentile_90, percentile_95, percentile_99, percentile_99_9.
pub fn get_snapshot(&self) -> Result<Snapshot, MetricsError> {
let snapshot = self.histogram.log_operation(Histogram::snapshot())?;
Ok(snapshot)
}

/// Returns counter for errors occurred in nonpaged queries
pub fn get_errors_num(&self) -> u64 {
self.errors_num.load(ORDER_TYPE)
Expand Down

0 comments on commit d989a59

Please sign in to comment.