Skip to content

Commit

Permalink
proxy: Use common error interface for error handling with cplane (#9454)
Browse files Browse the repository at this point in the history
- Remove obsolete error handles.
- Use one source of truth for cplane errors.
#18468
  • Loading branch information
awarus authored Oct 21, 2024
1 parent ababa50 commit 2dcac94
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 104 deletions.
6 changes: 5 additions & 1 deletion proxy/src/control_plane/messages.rs
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,9 @@ pub(crate) enum Reason {
/// LockAlreadyTaken indicates that the we attempted to take a lock that was already taken.
#[serde(rename = "LOCK_ALREADY_TAKEN")]
LockAlreadyTaken,
/// ActiveEndpointsLimitExceeded indicates that the limit of concurrently active endpoints was exceeded.
#[serde(rename = "ACTIVE_ENDPOINTS_LIMIT_EXCEEDED")]
ActiveEndpointsLimitExceeded,
#[default]
#[serde(other)]
Unknown,
Expand Down Expand Up @@ -194,7 +197,8 @@ impl Reason {
| Reason::ComputeTimeQuotaExceeded
| Reason::WrittenDataQuotaExceeded
| Reason::DataTransferQuotaExceeded
| Reason::LogicalSizeQuotaExceeded => false,
| Reason::LogicalSizeQuotaExceeded
| Reason::ActiveEndpointsLimitExceeded => false,
// transitive error. control plane is currently busy
// but might be ready soon
Reason::RunningOperations
Expand Down
32 changes: 2 additions & 30 deletions proxy/src/control_plane/provider/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,36 +87,8 @@ pub(crate) mod errors {
Reason::ConcurrencyLimitReached => ErrorKind::ControlPlane,
Reason::LockAlreadyTaken => ErrorKind::ControlPlane,
Reason::RunningOperations => ErrorKind::ControlPlane,
Reason::Unknown => match &**e {
ControlPlaneError {
http_status_code:
http::StatusCode::NOT_FOUND | http::StatusCode::NOT_ACCEPTABLE,
..
} => crate::error::ErrorKind::User,
ControlPlaneError {
http_status_code: http::StatusCode::UNPROCESSABLE_ENTITY,
error,
..
} if error
.contains("compute time quota of non-primary branches is exceeded") =>
{
crate::error::ErrorKind::Quota
}
ControlPlaneError {
http_status_code: http::StatusCode::LOCKED,
error,
..
} if error.contains("quota exceeded")
|| error.contains("the limit for current plan reached") =>
{
crate::error::ErrorKind::Quota
}
ControlPlaneError {
http_status_code: http::StatusCode::TOO_MANY_REQUESTS,
..
} => crate::error::ErrorKind::ServiceRateLimit,
ControlPlaneError { .. } => crate::error::ErrorKind::ControlPlane,
},
Reason::ActiveEndpointsLimitExceeded => ErrorKind::ControlPlane,
Reason::Unknown => ErrorKind::ControlPlane,
},
ApiError::Transport(_) => crate::error::ErrorKind::ControlPlane,
}
Expand Down
16 changes: 2 additions & 14 deletions proxy/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use metrics::{CounterPairAssoc, CounterPairVec, HyperLogLog, HyperLogLogVec};
use tokio::time::{self, Instant};

use crate::control_plane::messages::ColdStartInfo;
use crate::error::ErrorKind;

#[derive(MetricGroup)]
#[metric(new(thread_pool: Arc<ThreadPoolMetrics>))]
Expand Down Expand Up @@ -325,23 +326,10 @@ pub enum ConnectionFailureKind {
ComputeUncached,
}

#[derive(FixedCardinalityLabel, Copy, Clone)]
#[label(singleton = "kind")]
pub enum WakeupFailureKind {
BadComputeAddress,
ApiTransportError,
QuotaExceeded,
ApiConsoleLocked,
ApiConsoleBadRequest,
ApiConsoleOtherServerError,
ApiConsoleOtherError,
TimeoutError,
}

#[derive(LabelGroup)]
#[label(set = ConnectionFailuresBreakdownSet)]
pub struct ConnectionFailuresBreakdownGroup {
pub kind: WakeupFailureKind,
pub kind: ErrorKind,
pub retry: Bool,
}

Expand Down
62 changes: 3 additions & 59 deletions proxy/src/proxy/wake_compute.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
use hyper::StatusCode;
use tracing::{error, info, warn};

use super::connect_compute::ComputeConnectBackend;
use crate::config::RetryConfig;
use crate::context::RequestMonitoring;
use crate::control_plane::errors::WakeComputeError;
use crate::control_plane::messages::{ControlPlaneError, Reason};
use crate::control_plane::provider::CachedNodeInfo;
use crate::error::ReportableError;
use crate::metrics::{
ConnectOutcome, ConnectionFailuresBreakdownGroup, Metrics, RetriesMetricGroup, RetryType,
WakeupFailureKind,
};
use crate::proxy::retry::{retry_after, should_retry};

Expand Down Expand Up @@ -60,62 +58,8 @@ pub(crate) async fn wake_compute<B: ComputeConnectBackend>(
}

fn report_error(e: &WakeComputeError, retry: bool) {
use crate::control_plane::errors::ApiError;
let kind = match e {
WakeComputeError::BadComputeAddress(_) => WakeupFailureKind::BadComputeAddress,
WakeComputeError::ApiError(ApiError::Transport(_)) => WakeupFailureKind::ApiTransportError,
WakeComputeError::ApiError(ApiError::ControlPlane(e)) => match e.get_reason() {
Reason::RoleProtected => WakeupFailureKind::ApiConsoleBadRequest,
Reason::ResourceNotFound => WakeupFailureKind::ApiConsoleBadRequest,
Reason::ProjectNotFound => WakeupFailureKind::ApiConsoleBadRequest,
Reason::EndpointNotFound => WakeupFailureKind::ApiConsoleBadRequest,
Reason::BranchNotFound => WakeupFailureKind::ApiConsoleBadRequest,
Reason::RateLimitExceeded => WakeupFailureKind::ApiConsoleLocked,
Reason::NonDefaultBranchComputeTimeExceeded => WakeupFailureKind::QuotaExceeded,
Reason::ActiveTimeQuotaExceeded => WakeupFailureKind::QuotaExceeded,
Reason::ComputeTimeQuotaExceeded => WakeupFailureKind::QuotaExceeded,
Reason::WrittenDataQuotaExceeded => WakeupFailureKind::QuotaExceeded,
Reason::DataTransferQuotaExceeded => WakeupFailureKind::QuotaExceeded,
Reason::LogicalSizeQuotaExceeded => WakeupFailureKind::QuotaExceeded,
Reason::ConcurrencyLimitReached => WakeupFailureKind::ApiConsoleLocked,
Reason::LockAlreadyTaken => WakeupFailureKind::ApiConsoleLocked,
Reason::RunningOperations => WakeupFailureKind::ApiConsoleLocked,
Reason::Unknown => match **e {
ControlPlaneError {
http_status_code: StatusCode::LOCKED,
ref error,
..
} if error.contains("written data quota exceeded")
|| error.contains("the limit for current plan reached") =>
{
WakeupFailureKind::QuotaExceeded
}
ControlPlaneError {
http_status_code: StatusCode::UNPROCESSABLE_ENTITY,
ref error,
..
} if error.contains("compute time quota of non-primary branches is exceeded") => {
WakeupFailureKind::QuotaExceeded
}
ControlPlaneError {
http_status_code: StatusCode::LOCKED,
..
} => WakeupFailureKind::ApiConsoleLocked,
ControlPlaneError {
http_status_code: StatusCode::BAD_REQUEST,
..
} => WakeupFailureKind::ApiConsoleBadRequest,
ControlPlaneError {
http_status_code, ..
} if http_status_code.is_server_error() => {
WakeupFailureKind::ApiConsoleOtherServerError
}
ControlPlaneError { .. } => WakeupFailureKind::ApiConsoleOtherError,
},
},
WakeComputeError::TooManyConnections => WakeupFailureKind::ApiConsoleLocked,
WakeComputeError::TooManyConnectionAttempts(_) => WakeupFailureKind::TimeoutError,
};
let kind = e.get_error_kind();

Metrics::get()
.proxy
.connection_failures_breakdown
Expand Down

1 comment on commit 2dcac94

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

5317 tests run: 5093 passed, 0 failed, 224 skipped (full report)


Code coverage* (full report)

  • functions: 31.2% (7570 of 24246 functions)
  • lines: 48.8% (59930 of 122697 lines)

* collected from Rust tests only


The comment gets automatically updated with the latest test results
2dcac94 at 2024-10-21T15:44:18.859Z :recycle:

Please sign in to comment.