Skip to content

Commit

Permalink
proxy: change is cold start to enum (#6948)
Browse files Browse the repository at this point in the history
## Problem

Actually it's good idea to distinguish between cases when it's a cold
start, but we took the compute from the pool

## Summary of changes

Updated to enum.
  • Loading branch information
khanova authored Mar 4, 2024
1 parent 8dc7dc7 commit 3114be0
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 42 deletions.
14 changes: 12 additions & 2 deletions proxy/src/console/messages.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use serde::Deserialize;
use serde::{Deserialize, Serialize};
use std::fmt;

use crate::auth::IpPattern;
Expand Down Expand Up @@ -98,7 +98,16 @@ pub struct MetricsAuxInfo {
pub endpoint_id: EndpointId,
pub project_id: ProjectId,
pub branch_id: BranchId,
pub is_cold_start: Option<bool>,
pub cold_start_info: Option<ColdStartInfo>,
}

#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "snake_case")]
pub enum ColdStartInfo {
Unknown = 0,
Warm = 1,
PoolHit = 2,
PoolMiss = 3,
}

#[cfg(test)]
Expand All @@ -111,6 +120,7 @@ mod tests {
"endpoint_id": "endpoint",
"project_id": "project",
"branch_id": "branch",
"cold_start_info": "unknown",
})
}

Expand Down
8 changes: 4 additions & 4 deletions proxy/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use tracing::{field::display, info_span, Span};
use uuid::Uuid;

use crate::{
console::messages::MetricsAuxInfo,
console::messages::{ColdStartInfo, MetricsAuxInfo},
error::ErrorKind,
metrics::{LatencyTimer, ENDPOINT_ERRORS_BY_KIND, ERROR_BY_KIND},
BranchId, DbName, EndpointId, ProjectId, RoleName,
Expand Down Expand Up @@ -42,7 +42,7 @@ pub struct RequestMonitoring {
error_kind: Option<ErrorKind>,
pub(crate) auth_method: Option<AuthMethod>,
success: bool,
is_cold_start: Option<bool>,
cold_start_info: Option<ColdStartInfo>,

// extra
// This sender is here to keep the request monitoring channel open while requests are taking place.
Expand Down Expand Up @@ -91,7 +91,7 @@ impl RequestMonitoring {
error_kind: None,
auth_method: None,
success: false,
is_cold_start: None,
cold_start_info: None,

sender: LOG_CHAN.get().and_then(|tx| tx.upgrade()),
latency_timer: LatencyTimer::new(protocol),
Expand All @@ -115,7 +115,7 @@ impl RequestMonitoring {
self.set_endpoint_id(x.endpoint_id);
self.branch = Some(x.branch_id);
self.project = Some(x.project_id);
self.is_cold_start = x.is_cold_start;
self.cold_start_info = x.cold_start_info;
}

pub fn set_project_id(&mut self, project_id: ProjectId) {
Expand Down
75 changes: 39 additions & 36 deletions proxy/src/context/parquet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ struct RequestData {
/// Or if we make it to proxy_pass
success: bool,
/// Indicates if the cplane started the new compute node for this request.
is_cold_start: Option<bool>,
cold_start_info: Option<String>,
/// Tracks time from session start (HTTP request/libpq TCP handshake)
/// Through to success/failure
duration_us: u64,
Expand Down Expand Up @@ -121,7 +121,10 @@ impl From<RequestMonitoring> for RequestData {
region: value.region,
error: value.error_kind.as_ref().map(|e| e.to_metric_label()),
success: value.success,
is_cold_start: value.is_cold_start,
cold_start_info: value
.cold_start_info
.as_ref()
.map(|x| serde_json::to_string(x).unwrap_or_default()),
duration_us: SystemTime::from(value.first_packet)
.elapsed()
.unwrap_or_default()
Expand Down Expand Up @@ -455,7 +458,7 @@ mod tests {
region: "us-east-1",
error: None,
success: rng.gen(),
is_cold_start: Some(true),
cold_start_info: Some("no".into()),
duration_us: rng.gen_range(0..30_000_000),
}
}
Expand Down Expand Up @@ -525,16 +528,16 @@ mod tests {
assert_eq!(
file_stats,
[
(1315032, 3, 6000),
(1315025, 3, 6000),
(1315085, 3, 6000),
(1315042, 3, 6000),
(1315172, 3, 6000),
(1315014, 3, 6000),
(1314806, 3, 6000),
(1315042, 3, 6000),
(438563, 1, 2000)
],
(1314406, 3, 6000),
(1314399, 3, 6000),
(1314459, 3, 6000),
(1314416, 3, 6000),
(1314546, 3, 6000),
(1314388, 3, 6000),
(1314180, 3, 6000),
(1314416, 3, 6000),
(438359, 1, 2000)
]
);

tmpdir.close().unwrap();
Expand Down Expand Up @@ -563,12 +566,12 @@ mod tests {
assert_eq!(
file_stats,
[
(1220433, 5, 10000),
(1226583, 5, 10000),
(1228377, 5, 10000),
(1227739, 5, 10000),
(1219017, 5, 10000)
],
(1220668, 5, 10000),
(1226818, 5, 10000),
(1228612, 5, 10000),
(1227974, 5, 10000),
(1219252, 5, 10000)
]
);

tmpdir.close().unwrap();
Expand Down Expand Up @@ -599,12 +602,12 @@ mod tests {
assert_eq!(
file_stats,
[
(1206080, 5, 10000),
(1205811, 5, 10000),
(1206104, 5, 10000),
(1206092, 5, 10000),
(1206347, 5, 10000)
],
(1206315, 5, 10000),
(1206046, 5, 10000),
(1206339, 5, 10000),
(1206327, 5, 10000),
(1206582, 5, 10000)
]
);

tmpdir.close().unwrap();
Expand All @@ -628,16 +631,16 @@ mod tests {
assert_eq!(
file_stats,
[
(1315032, 3, 6000),
(1315025, 3, 6000),
(1315085, 3, 6000),
(1315042, 3, 6000),
(1315172, 3, 6000),
(1315014, 3, 6000),
(1314806, 3, 6000),
(1315042, 3, 6000),
(438563, 1, 2000)
],
(1314406, 3, 6000),
(1314399, 3, 6000),
(1314459, 3, 6000),
(1314416, 3, 6000),
(1314546, 3, 6000),
(1314388, 3, 6000),
(1314180, 3, 6000),
(1314416, 3, 6000),
(438359, 1, 2000)
]
);

tmpdir.close().unwrap();
Expand Down Expand Up @@ -673,7 +676,7 @@ mod tests {
// files are smaller than the size threshold, but they took too long to fill so were flushed early
assert_eq!(
file_stats,
[(659129, 2, 3001), (658842, 2, 3000), (658638, 2, 2999)],
[(658837, 2, 3001), (658551, 2, 3000), (658347, 2, 2999)]
);

tmpdir.close().unwrap();
Expand Down

1 comment on commit 3114be0

@github-actions
Copy link

@github-actions github-actions bot commented on 3114be0 Mar 4, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

2561 tests run: 2424 passed, 2 failed, 135 skipped (full report)


Failures on Postgres 14

  • test_basebackup_with_high_slru_count[github-actions-selfhosted-sequential-10-13-30]: release
  • test_basebackup_with_high_slru_count[github-actions-selfhosted-vectored-10-13-30]: release
# Run all failed tests locally:
scripts/pytest -vv -n $(nproc) -k "test_basebackup_with_high_slru_count[release-pg14-github-actions-selfhosted-sequential-10-13-30] or test_basebackup_with_high_slru_count[release-pg14-github-actions-selfhosted-vectored-10-13-30]"
Flaky tests (1)

Postgres 14

  • test_basebackup_with_high_slru_count[github-actions-selfhosted-vectored-10-13-30]: release

Code coverage* (full report)

  • functions: 28.7% (6939 of 24181 functions)
  • lines: 47.2% (42567 of 90175 lines)

* collected from Rust tests only


The comment gets automatically updated with the latest test results
3114be0 at 2024-03-04T11:23:30.368Z :recycle:

Please sign in to comment.