From 595aa2f4dba0eef19a67ce7e0a9088a835632208 Mon Sep 17 00:00:00 2001 From: Brian Martin Date: Fri, 13 Dec 2024 15:06:14 -0800 Subject: [PATCH] add a counter group metric type Adds a new metric type designed to more efficiently represent the per-core metrics by reducing the number of metric entries. --- src/common/bpf/builder.rs | 4 +- src/common/bpf/counters.rs | 8 +- src/common/counters/group.rs | 63 ++++++++++ src/common/counters/mod.rs | 2 + src/exposition/http/mod.rs | 155 ++++++++++++++---------- src/samplers/cpu/linux/frequency/mod.rs | 21 +--- src/samplers/cpu/linux/perf/mod.rs | 21 +--- src/samplers/cpu/linux/stats.rs | 129 +++++++++++++++++--- src/samplers/cpu/linux/usage/mod.rs | 37 ++---- 9 files changed, 285 insertions(+), 155 deletions(-) create mode 100644 src/common/counters/group.rs diff --git a/src/common/bpf/builder.rs b/src/common/bpf/builder.rs index 5fa02a02..1eabd1b5 100644 --- a/src/common/bpf/builder.rs +++ b/src/common/bpf/builder.rs @@ -56,7 +56,7 @@ pub struct Builder> { counters: Vec<(&'static str, Vec<&'static LazyCounter>)>, histograms: Vec<(&'static str, &'static RwLockHistogram)>, maps: Vec<(&'static str, Vec)>, - cpu_counters: Vec<(&'static str, Vec<&'static LazyCounter>, ScopedCounters)>, + cpu_counters: Vec<(&'static str, Vec<&'static LazyCounter>, Vec<&'static CounterGroup>)>, perf_events: Vec<(&'static str, PerfEvent)>, } @@ -280,7 +280,7 @@ where mut self, name: &'static str, totals: Vec<&'static LazyCounter>, - individual: ScopedCounters, + individual: Vec<&'static CounterGroup>, ) -> Self { self.cpu_counters.push((name, totals, individual)); self diff --git a/src/common/bpf/counters.rs b/src/common/bpf/counters.rs index 36f475f3..55cf88e6 100644 --- a/src/common/bpf/counters.rs +++ b/src/common/bpf/counters.rs @@ -128,7 +128,7 @@ impl<'a> Counters<'a> { pub struct CpuCounters<'a> { counter_map: CounterMap<'a>, totals: Vec<&'static LazyCounter>, - individual: ScopedCounters, + individual: Vec<&'static CounterGroup>, values: Vec, } @@ -138,7 +138,7 @@ impl<'a> CpuCounters<'a> { pub fn new( map: &'a Map, totals: Vec<&'static LazyCounter>, - individual: ScopedCounters, + individual: Vec<&'static CounterGroup>, ) -> Self { // we need temporary buffer so we can total up the per-CPU values let values = vec![0; totals.len()]; @@ -167,14 +167,14 @@ impl<'a> CpuCounters<'a> { // iterate through and increment our local value for each cpu counter for cpu in 0..MAX_CPUS { - for idx in 0..self.totals.len() { + for idx in 0..self.individual.len() { let value = counters[idx + cpu * bank_width]; // add this CPU's counter to the combined value for this counter self.values[idx] = self.values[idx].wrapping_add(value); // set this CPU's counter to the new value - let _ = self.individual.set(cpu, idx, value); + let _ = self.individual[idx].set(cpu, value); } } diff --git a/src/common/counters/group.rs b/src/common/counters/group.rs new file mode 100644 index 00000000..87d4caa3 --- /dev/null +++ b/src/common/counters/group.rs @@ -0,0 +1,63 @@ +use metriken::Metric; +use parking_lot::RwLock; +use std::sync::OnceLock; +use thiserror::Error; +use metriken::Value; + +#[derive(Error, Debug, PartialEq)] +pub enum CounterGroupError { + #[error("the index is higher than the counter group size")] + InvalidIndex, +} + +/// A group of counters that's protected by a reader-writer lock. +pub struct CounterGroup { + inner: OnceLock>>, + entries: usize, +} + +impl Metric for CounterGroup { + fn as_any(&self) -> std::option::Option<&(dyn std::any::Any + 'static)> { + Some(self) + } + + fn value(&self) -> std::option::Option> { + Some(Value::Other(self)) + } +} + +impl CounterGroup { + /// Create a new counter group + pub const fn new(entries: usize) -> Self { + Self { + inner: OnceLock::new(), + entries, + } + } + + /// Sets the counter at a given index to the provided value + pub fn set(&self, idx: usize, value: u64) -> Result<(), CounterGroupError> { + if idx >= self.entries { + Err(CounterGroupError::InvalidIndex) + } else { + let mut inner = self.get_or_init().write(); + + inner[idx] = value; + + Ok(()) + } + } + + /// Load the counter values + pub fn load(&self) -> Option> { + self.inner.get().map(|v| v.read().clone()) + } + + pub fn len(&self) -> usize { + self.entries + } + + fn get_or_init(&self) -> &RwLock> { + self.inner.get_or_init(|| vec![0; self.entries].into()) + } +} diff --git a/src/common/counters/mod.rs b/src/common/counters/mod.rs index 5c5e0b79..1f272fb3 100644 --- a/src/common/counters/mod.rs +++ b/src/common/counters/mod.rs @@ -2,7 +2,9 @@ #![allow(unused_imports)] mod dynamic; +mod group; mod scoped; pub use dynamic::{DynamicCounter, DynamicCounterBuilder}; +pub use group::{CounterGroup, CounterGroupError}; pub use scoped::ScopedCounters; diff --git a/src/exposition/http/mod.rs b/src/exposition/http/mod.rs index 98f25ee6..481d4077 100644 --- a/src/exposition/http/mod.rs +++ b/src/exposition/http/mod.rs @@ -1,4 +1,4 @@ -use crate::common::HISTOGRAM_GROUPING_POWER; +use crate::common::{HISTOGRAM_GROUPING_POWER, CounterGroup}; use crate::{Arc, Config, Sampler}; use axum::extract::State; use axum::routing::get; @@ -136,76 +136,94 @@ async fn prometheus(State(state): State>) -> String { data.push(format!( "# TYPE {name} counter\n{name_with_metadata} {value} {timestamp}" )); - continue; } Some(Value::Gauge(value)) => { data.push(format!( "# TYPE {name} gauge\n{name_with_metadata} {value} {timestamp}" )); - continue; } - Some(_) => {} - None => { - continue; - } - } - - let any = match metric.as_any() { - Some(any) => any, - None => { - continue; - } - }; - - if let Some(histogram) = any.downcast_ref::() { - if state.config.prometheus().histograms() { - if let Some(histogram) = histogram.load() { - let current = HISTOGRAM_GROUPING_POWER; - let target = state.config.prometheus().histogram_grouping_power(); - - // downsample the histogram if necessary - let downsampled: Option = if current == target { - // the powers matched, we don't need to downsample - None - } else { - Some(histogram.downsample(target).unwrap()) - }; - - // reassign to either use the downsampled histogram or the original - let histogram = if let Some(histogram) = downsampled.as_ref() { - histogram - } else { - &histogram - }; - - // we need to export a total count (free-running) - let mut count = 0; - // we also need to export a total sum of all observations - // which is also free-running - let mut sum = 0; - - let mut entry = format!("# TYPE {name}_distribution histogram\n"); - for bucket in histogram { - // add this bucket's sum of observations - sum += bucket.count() * bucket.end(); - - // add the count to the aggregate - count += bucket.count(); - - entry += &format!( - "{name}_distribution_bucket{{le=\"{}\"}} {count} {timestamp}\n", - bucket.end() - ); + Some(Value::Other(any)) => { + if let Some(histogram) = any.downcast_ref::() { + if state.config.prometheus().histograms() { + if let Some(histogram) = histogram.load() { + let current = HISTOGRAM_GROUPING_POWER; + let target = state.config.prometheus().histogram_grouping_power(); + + // downsample the histogram if necessary + let downsampled: Option = if current == target { + // the powers matched, we don't need to downsample + None + } else { + Some(histogram.downsample(target).unwrap()) + }; + + // reassign to either use the downsampled histogram or the original + let histogram = if let Some(histogram) = downsampled.as_ref() { + histogram + } else { + &histogram + }; + + // we need to export a total count (free-running) + let mut count = 0; + // we also need to export a total sum of all observations + // which is also free-running + let mut sum = 0; + + let mut entry = format!("# TYPE {name}_distribution histogram\n"); + for bucket in histogram { + // add this bucket's sum of observations + sum += bucket.count() * bucket.end(); + + // add the count to the aggregate + count += bucket.count(); + + entry += &format!( + "{name}_distribution_bucket{{le=\"{}\"}} {count} {timestamp}\n", + bucket.end() + ); + } + + entry += &format!( + "{name}_distribution_bucket{{le=\"+Inf\"}} {count} {timestamp}\n" + ); + entry += &format!("{name}_distribution_count {count} {timestamp}\n"); + entry += &format!("{name}_distribution_sum {sum} {timestamp}"); + + data.push(entry); + } + } + } else if let Some(counters) = any.downcast_ref::() { + if let Some(counters) = counters.load() { + let mut entry = format!("# TYPE {name} counter"); + + let metadata: Vec = metric + .metadata() + .iter() + .map(|(key, value)| format!("{key}=\"{value}\"")) + .collect(); + + let metadata = metadata.join(", "); + + for (id, value) in counters.iter().enumerate() { + if *value == 0 { + continue; + } + + if metadata.is_empty() { + entry += &format!("\n{name}{{id=\"{id}\"}} {value} {timestamp}"); + } else { + entry += &format!( + "\n{name}{{{metadata}, id=\"{id}\"}} {value} {timestamp}" + ); + } + } + + data.push(entry); } - - entry += - &format!("{name}_distribution_bucket{{le=\"+Inf\"}} {count} {timestamp}\n"); - entry += &format!("{name}_distribution_count {count} {timestamp}\n"); - entry += &format!("{name}_distribution_sum {sum} {timestamp}"); - - data.push(entry); } } + _ => {} } } @@ -253,6 +271,19 @@ fn simple_stats(quoted: bool) -> Vec { Some(Value::Gauge(value)) => { data.push(format!("{q}{simple_name}{q}: {value}")); } + Some(Value::Other(any)) => { + if let Some(counters) = any.downcast_ref::() { + if let Some(counters) = counters.load() { + for (id, value) in counters.iter().enumerate() { + if *value == 0 { + continue; + } + + data.push(format!("{q}{simple_name}/{id}{q}: {value}")); + } + } + } + } Some(_) | None => {} } } diff --git a/src/samplers/cpu/linux/frequency/mod.rs b/src/samplers/cpu/linux/frequency/mod.rs index 8a96ccbf..ac3bde74 100644 --- a/src/samplers/cpu/linux/frequency/mod.rs +++ b/src/samplers/cpu/linux/frequency/mod.rs @@ -32,27 +32,8 @@ fn init(config: Arc) -> SamplerResult { return Ok(None); } - let cpus = crate::common::linux::cpus()?; - let totals = vec![&CPU_APERF, &CPU_MPERF, &CPU_TSC]; - - let metrics = ["cpu/aperf", "cpu/mperf", "cpu/tsc"]; - - let mut individual = ScopedCounters::new(); - - for cpu in cpus { - for metric in metrics { - individual.push( - cpu, - DynamicCounterBuilder::new(metric) - .metadata("id", format!("{}", cpu)) - .formatter(cpu_metric_percore_formatter) - .build(), - ); - } - } - - println!("initializing bpf for: {NAME}"); + let individual = vec![&CPU_APERF_PERCORE, &CPU_MPERF_PERCORE, &CPU_TSC_PERCORE]; let bpf = BpfBuilder::new(ModSkelBuilder::default) .perf_event("aperf", PerfEvent::msr(MsrId::APERF)?) diff --git a/src/samplers/cpu/linux/perf/mod.rs b/src/samplers/cpu/linux/perf/mod.rs index 94c61fff..d32c9a77 100644 --- a/src/samplers/cpu/linux/perf/mod.rs +++ b/src/samplers/cpu/linux/perf/mod.rs @@ -30,27 +30,8 @@ fn init(config: Arc) -> SamplerResult { return Ok(None); } - let cpus = crate::common::linux::cpus()?; - let totals = vec![&CPU_CYCLES, &CPU_INSTRUCTIONS]; - - let metrics = ["cpu/cycles", "cpu/instructions"]; - - let mut individual = ScopedCounters::new(); - - for cpu in cpus { - for metric in metrics { - individual.push( - cpu, - DynamicCounterBuilder::new(metric) - .metadata("id", format!("{}", cpu)) - .formatter(cpu_metric_percore_formatter) - .build(), - ); - } - } - - println!("initializing bpf for: {NAME}"); + let individual = vec![&CPU_CYCLES_PERCORE, &CPU_INSTRUCTIONS_PERCORE]; let bpf = BpfBuilder::new(ModSkelBuilder::default) .perf_event("cycles", PerfEvent::cpu_cycles()) diff --git a/src/samplers/cpu/linux/stats.rs b/src/samplers/cpu/linux/stats.rs index 4f71658b..ff525348 100644 --- a/src/samplers/cpu/linux/stats.rs +++ b/src/samplers/cpu/linux/stats.rs @@ -1,14 +1,9 @@ +use crate::common::CounterGroup; use crate::samplers::cpu::stats::*; use metriken::*; -#[metric( - name = "cpu/usage/total", - description = "The amount of CPU time spent waiting for IO to complete", - formatter = cpu_usage_total_formatter, - metadata = { state = "io_wait", unit = "nanoseconds" } -)] -pub static CPU_USAGE_IO_WAIT: LazyCounter = LazyCounter::new(Counter::default); +pub static MAX_CPUS: usize = 1024; #[metric( name = "cpu/usage/total", @@ -50,6 +45,78 @@ pub static CPU_USAGE_GUEST: LazyCounter = LazyCounter::new(Counter::default); )] pub static CPU_USAGE_GUEST_NICE: LazyCounter = LazyCounter::new(Counter::default); +#[metric( + name = "cpu/usage", + description = "The amount of CPU time spent busy", + formatter = cpu_usage_percore_formatter, + metadata = { state = "busy", unit = "nanoseconds" } +)] +pub static CPU_USAGE_PERCORE_BUSY: CounterGroup = CounterGroup::new(MAX_CPUS); + +#[metric( + name = "cpu/usage", + description = "The amount of CPU time spent executing normal tasks is user mode", + formatter = cpu_usage_percore_formatter, + metadata = { state = "user", unit = "nanoseconds" } +)] +pub static CPU_USAGE_PERCORE_USER: CounterGroup = CounterGroup::new(MAX_CPUS); + +#[metric( + name = "cpu/usage", + description = "The amount of CPU time spent executing low priority tasks in user mode", + formatter = cpu_usage_percore_formatter, + metadata = { state = "nice", unit = "nanoseconds" } +)] +pub static CPU_USAGE_PERCORE_NICE: CounterGroup = CounterGroup::new(MAX_CPUS); + +#[metric( + name = "cpu/usage", + description = "The amount of CPU time spent executing tasks in kernel mode", + formatter = cpu_usage_percore_formatter, + metadata = { state = "system", unit = "nanoseconds" } +)] +pub static CPU_USAGE_PERCORE_SYSTEM: CounterGroup = CounterGroup::new(MAX_CPUS); + +#[metric( + name = "cpu/usage", + description = "The amount of CPU time spent servicing softirqs", + formatter = cpu_usage_percore_formatter, + metadata = { state = "softirq", unit = "nanoseconds" } +)] +pub static CPU_USAGE_PERCORE_SOFTIRQ: CounterGroup = CounterGroup::new(MAX_CPUS); + +#[metric( + name = "cpu/usage", + description = "The amount of CPU time spent servicing interrupts", + formatter = cpu_usage_percore_formatter, + metadata = { state = "irq", unit = "nanoseconds" } +)] +pub static CPU_USAGE_PERCORE_IRQ: CounterGroup = CounterGroup::new(MAX_CPUS); + +#[metric( + name = "cpu/usage", + description = "The amount of CPU time stolen by the hypervisor", + formatter = cpu_usage_total_formatter, + metadata = { state = "steal", unit = "nanoseconds" } +)] +pub static CPU_USAGE_PERCORE_STEAL: CounterGroup = CounterGroup::new(MAX_CPUS); + +#[metric( + name = "cpu/usage", + description = "The amount of CPU time spent running a virtual CPU for a guest", + formatter = cpu_usage_percore_formatter, + metadata = { state = "guest", unit = "nanoseconds" } +)] +pub static CPU_USAGE_PERCORE_GUEST: CounterGroup = CounterGroup::new(MAX_CPUS); + +#[metric( + name = "cpu/usage", + description = "The amount of CPU time spent running a virtual CPU for a guest in low priority mode", + formatter = cpu_usage_percore_formatter, + metadata = { state = "guest_nice", unit = "nanoseconds" } +)] +pub static CPU_USAGE_PERCORE_GUEST_NICE: CounterGroup = CounterGroup::new(MAX_CPUS); + #[metric( name = "cpu/cycles/total", description = "The number of elapsed CPU cycles", @@ -73,18 +140,44 @@ pub static CPU_MPERF: LazyCounter = LazyCounter::new(Counter::default); #[metric(name = "cpu/tsc/total")] pub static CPU_TSC: LazyCounter = LazyCounter::new(Counter::default); -pub fn simple_formatter(metric: &MetricEntry, _format: Format) -> String { - metric.name().to_string() -} +#[metric( + name = "cpu/cycles", + description = "The number of elapsed CPU cycles", + formatter = cpu_metric_percore_formatter, + metadata = { unit = "cycles" } +)] +pub static CPU_CYCLES_PERCORE: CounterGroup = CounterGroup::new(MAX_CPUS); + +#[metric( + name = "cpu/instructions", + description = "The number of instructions retired", + formatter = cpu_metric_percore_formatter, + metadata = { unit = "instructions" } +)] +pub static CPU_INSTRUCTIONS_PERCORE: CounterGroup = CounterGroup::new(MAX_CPUS); + +#[metric( + name = "cpu/aperf", + formatter = cpu_metric_percore_formatter +)] +pub static CPU_APERF_PERCORE: CounterGroup = CounterGroup::new(MAX_CPUS); + +#[metric( + name = "cpu/mperf", + formatter = cpu_metric_percore_formatter +)] +pub static CPU_MPERF_PERCORE: CounterGroup = CounterGroup::new(MAX_CPUS); + +#[metric( + name = "cpu/tsc", + formatter = cpu_metric_percore_formatter +)] +pub static CPU_TSC_PERCORE: CounterGroup = CounterGroup::new(MAX_CPUS); pub fn cpu_metric_percore_formatter(metric: &MetricEntry, format: Format) -> String { match format { Format::Simple => { - let id = metric - .metadata() - .get("id") - .expect("no `id` for metric formatter"); - format!("{}/cpu{id}", metric.name()) + format!("{}/cpu", metric.name()) } _ => metric.name().to_string(), } @@ -93,15 +186,11 @@ pub fn cpu_metric_percore_formatter(metric: &MetricEntry, format: Format) -> Str pub fn cpu_usage_percore_formatter(metric: &MetricEntry, format: Format) -> String { match format { Format::Simple => { - let id = metric - .metadata() - .get("id") - .expect("no `id` for metric formatter"); let state = metric .metadata() .get("state") .expect("no `state` for metric formatter"); - format!("{}/{state}/cpu{id}", metric.name()) + format!("{}/{state}/cpu", metric.name()) } _ => metric.name().to_string(), } diff --git a/src/samplers/cpu/linux/usage/mod.rs b/src/samplers/cpu/linux/usage/mod.rs index 4bfaac2c..98bf576d 100644 --- a/src/samplers/cpu/linux/usage/mod.rs +++ b/src/samplers/cpu/linux/usage/mod.rs @@ -33,8 +33,6 @@ fn init(config: Arc) -> SamplerResult { return Ok(None); } - let cpus = crate::common::linux::cpus()?; - let totals = vec![ &CPU_USAGE_BUSY, &CPU_USAGE_USER, @@ -47,33 +45,18 @@ fn init(config: Arc) -> SamplerResult { &CPU_USAGE_GUEST_NICE, ]; - let states = [ - "busy", - "user", - "nice", - "system", - "softirq", - "irq", - "steal", - "guest", - "guest_nice", + let individual = vec![ + &CPU_USAGE_PERCORE_BUSY, + &CPU_USAGE_PERCORE_USER, + &CPU_USAGE_PERCORE_NICE, + &CPU_USAGE_PERCORE_SYSTEM, + &CPU_USAGE_PERCORE_SOFTIRQ, + &CPU_USAGE_PERCORE_IRQ, + &CPU_USAGE_PERCORE_STEAL, + &CPU_USAGE_PERCORE_GUEST, + &CPU_USAGE_PERCORE_GUEST_NICE, ]; - let mut individual = ScopedCounters::new(); - - for cpu in cpus { - for state in states { - individual.push( - cpu, - DynamicCounterBuilder::new("cpu/usage") - .metadata("id", format!("{}", cpu)) - .metadata("state", state) - .formatter(cpu_usage_percore_formatter) - .build(), - ); - } - } - let bpf = BpfBuilder::new(ModSkelBuilder::default) .cpu_counters("counters", totals, individual) .build()?;