Skip to content

Commit

Permalink
feat(consumption_metrics): remove event deduplication support (#5316)
Browse files Browse the repository at this point in the history
We no longer use pageserver deduplication anywhere. Give out a warning
instead.

Split off from #5297.

Cc: #5175 for dedup.
  • Loading branch information
koivunej authored Sep 15, 2023
1 parent c5d226d commit 00c4c8e
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 30 deletions.
2 changes: 1 addition & 1 deletion pageserver/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ pub mod defaults {
super::ConfigurableSemaphore::DEFAULT_INITIAL.get();

pub const DEFAULT_METRIC_COLLECTION_INTERVAL: &str = "10 min";
pub const DEFAULT_CACHED_METRIC_COLLECTION_INTERVAL: &str = "1 hour";
pub const DEFAULT_CACHED_METRIC_COLLECTION_INTERVAL: &str = "0s";
pub const DEFAULT_METRIC_COLLECTION_ENDPOINT: Option<reqwest::Url> = None;
pub const DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL: &str = "10 min";
pub const DEFAULT_BACKGROUND_TASK_MAXIMUM_DELAY: &str = "10s";
Expand Down
37 changes: 8 additions & 29 deletions pageserver/src/consumption_metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -221,11 +221,16 @@ type Cache = HashMap<MetricsKey, (EventType, u64)>;
pub async fn collect_metrics(
metric_collection_endpoint: &Url,
metric_collection_interval: Duration,
cached_metric_collection_interval: Duration,
_cached_metric_collection_interval: Duration,
synthetic_size_calculation_interval: Duration,
node_id: NodeId,
ctx: RequestContext,
) -> anyhow::Result<()> {
if _cached_metric_collection_interval != Duration::ZERO {
tracing::warn!(
"cached_metric_collection_interval is no longer used, please set it to zero."
)
}
let mut ticker = tokio::time::interval(metric_collection_interval);
info!("starting collect_metrics");

Expand Down Expand Up @@ -253,7 +258,6 @@ pub async fn collect_metrics(
.build()
.expect("Failed to create http client with timeout");
let mut cached_metrics = HashMap::new();
let mut prev_iteration_time: std::time::Instant = std::time::Instant::now();

loop {
tokio::select! {
Expand All @@ -263,14 +267,7 @@ pub async fn collect_metrics(
},
tick_at = ticker.tick() => {

// send cached metrics every cached_metric_collection_interval
let send_cached = prev_iteration_time.elapsed() >= cached_metric_collection_interval;

if send_cached {
prev_iteration_time = std::time::Instant::now();
}

collect_metrics_iteration(&client, &mut cached_metrics, metric_collection_endpoint, node_id, &ctx, send_cached).await;
collect_metrics_iteration(&client, &mut cached_metrics, metric_collection_endpoint, node_id, &ctx).await;

crate::tenant::tasks::warn_when_period_overrun(
tick_at.elapsed(),
Expand Down Expand Up @@ -298,7 +295,6 @@ async fn collect_metrics_iteration(
metric_collection_endpoint: &reqwest::Url,
node_id: NodeId,
ctx: &RequestContext,
send_cached: bool,
) {
trace!(
"starting collect_metrics_iteration. metric_collection_endpoint: {}",
Expand All @@ -325,24 +321,7 @@ async fn collect_metrics_iteration(
}
});

let mut current_metrics = collect(tenants, cached_metrics, ctx).await;

// Filter metrics, unless we want to send all metrics, including cached ones.
// See: https://github.com/neondatabase/neon/issues/3485
if !send_cached {
current_metrics.retain(|(curr_key, (kind, curr_val))| {
if kind.is_incremental() {
// incremental values (currently only written_size_delta) should not get any cache
// deduplication because they will be used by upstream for "is still alive."
true
} else {
match cached_metrics.get(curr_key) {
Some((_, val)) => val != curr_val,
None => true,
}
}
});
}
let current_metrics = collect(tenants, cached_metrics, ctx).await;

if current_metrics.is_empty() {
trace!("no new metrics to send");
Expand Down

1 comment on commit 00c4c8e

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

2552 tests run: 2424 passed, 0 failed, 128 skipped (full report)


Flaky tests (5)

Postgres 16

  • test_partial_evict_tenant: debug
  • test_get_tenant_size_with_multiple_branches: release, debug

Postgres 14

  • test_get_tenant_size_with_multiple_branches: release, debug

Code coverage (full report)

  • functions: 53.0% (7678 of 14491 functions)
  • lines: 80.9% (44871 of 55475 lines)

The comment gets automatically updated with the latest test results
00c4c8e at 2023-09-15T21:52:55.384Z :recycle:

Please sign in to comment.