diff --git a/README.md b/README.md index 67c5f6ec..f1e338fd 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,36 @@ will check an Edge process running on http://localhost:3063. If you're using bas If you're hosting Edge with a self-signed certificate using the tls cli arguments, you should use the `--ca-certificate-file ` flag (or the CA_CERTIFICATE_FILE environment variable) to allow the health checker to trust the self signed certificate. +### Built-in Ready check +There is now (from 12.0.0) a subcommand named `ready` which will ping your ready endpoint and exit with status 0 provided the ready endpoint returns 200 OK and `{ status: "READY" }`. Otherwise it will return status 1 and an error message to signal that Edge is not ready (it has not spoken to upstream or recovered from a persisted backup). + +Examples: +* Edge not running: +```shell +$ ./unleash-edge ready +Error: Failed to connect to ready endpoint at http://localhost:3063/internal-backstage/ready. Failed with status None +$ echo $? +1 +``` + +* Edge running but not populated its feature cache yet (not spoken to upstream or restored from backup) +```shell +$ ./unleash-edge ready +Error: Ready check returned a different status than READY. It returned EdgeStatus { status: NotReady } +$ echo $? +1 +``` +* Edge running and synchronized. I.e. READY +```shell +$ ./unleash-edge ready +OK +$ echo $? +0 +``` + +If you're hosting Edge with a self-signed certificate using the tls cli arguments, you should use the `--ca-certificate-file ` flag (or the CA_CERTIFICATE_FILE environment variable) to allow the health checker to trust the self signed certificate. + + ## Getting Unleash Edge Unleash Edge is distributed as a binary and as a docker image. diff --git a/server/src/builder.rs b/server/src/builder.rs index fc59698d..f9f26b54 100644 --- a/server/src/builder.rs +++ b/server/src/builder.rs @@ -201,6 +201,6 @@ pub async fn build_caches_and_refreshers(args: CliArgs) -> EdgeResult build_offline(offline_args).map(|cache| (cache, None, None, None)) } EdgeMode::Edge(edge_args) => build_edge(&edge_args).await, - EdgeMode::Health(_) => unreachable!("Trying to build caches for health check"), + _ => unreachable!(), } } diff --git a/server/src/cli.rs b/server/src/cli.rs index d0eea283..c53302f1 100644 --- a/server/src/cli.rs +++ b/server/src/cli.rs @@ -16,6 +16,8 @@ pub enum EdgeMode { Offline(OfflineArgs), /// Perform a health check against a running edge instance Health(HealthCheckArgs), + /// Perform a ready check against a running edge instance + Ready(ReadyCheckArgs), } #[derive(ValueEnum, Debug, Clone)] @@ -205,6 +207,17 @@ pub struct HealthCheckArgs { pub ca_certificate_file: Option, } +#[derive(Args, Debug, Clone)] +pub struct ReadyCheckArgs { + /// Where the instance you want to health check is running + #[clap(short, long, env, default_value = "http://localhost:3063")] + pub edge_url: String, + + /// If you're hosting Edge using a self-signed TLS certificate use this to tell healthcheck about your CA + #[clap(short, long, env)] + pub ca_certificate_file: Option, +} + #[derive(Parser, Debug, Clone)] pub struct CliArgs { #[clap(flatten)] @@ -348,8 +361,7 @@ mod tests { assert_eq!(api_key.0, "X-Api-Key"); assert_eq!(api_key.1, "mysecret") } - EdgeMode::Offline(_) => unreachable!(), - EdgeMode::Health(_) => unreachable!(), + _ => unreachable!(), } } @@ -373,8 +385,7 @@ mod tests { assert_eq!(api_key.0, "X-Api-Key"); assert_eq!(api_key.1, "mysecret") } - EdgeMode::Offline(_) => unreachable!(), - EdgeMode::Health(_) => unreachable!(), + _ => unreachable!(), } } @@ -395,8 +406,7 @@ mod tests { assert_eq!(auth.0, "Authorization"); assert_eq!(auth.1, "test:test.secret"); } - EdgeMode::Offline(_) => unreachable!(), - EdgeMode::Health(_) => unreachable!(), + _ => unreachable!(), } } diff --git a/server/src/error.rs b/server/src/error.rs index 3b4871ec..a00e16d1 100644 --- a/server/src/error.rs +++ b/server/src/error.rs @@ -106,6 +106,7 @@ pub enum EdgeError { JsonParseError(String), NoFeaturesFile, NoTokenProvider, + ReadyCheckError(String), TlsError, TokenParseError(String), ContextParseError, @@ -177,6 +178,9 @@ impl Display for EdgeError { EdgeError::HealthCheckError(message) => { write!(f, "{message}") } + EdgeError::ReadyCheckError(message) => { + write!(f, "{message}") + } EdgeError::TokenValidationError(status_code) => { write!( f, @@ -216,6 +220,7 @@ impl ResponseError for EdgeError { EdgeError::ServiceAccountTokenNotEnabled => StatusCode::NETWORK_AUTHENTICATION_REQUIRED, EdgeError::EdgeMetricsRequestError(status_code) => *status_code, EdgeError::HealthCheckError(_) => StatusCode::INTERNAL_SERVER_ERROR, + EdgeError::ReadyCheckError(_) => StatusCode::INTERNAL_SERVER_ERROR, } } diff --git a/server/src/internal_backstage.rs b/server/src/internal_backstage.rs index eb297266..a9c3b918 100644 --- a/server/src/internal_backstage.rs +++ b/server/src/internal_backstage.rs @@ -1,6 +1,7 @@ use crate::auth::token_validator::TokenValidator; use crate::http::feature_refresher::FeatureRefresher; use crate::metrics::actix_web_metrics::PrometheusMetricsHandler; +use crate::types::Status; use crate::types::{BuildInfo, EdgeJsonResult, EdgeToken, TokenInfo, TokenRefresh}; use actix_web::{ get, @@ -11,25 +12,22 @@ use serde::{Deserialize, Serialize}; use unleash_types::client_features::ClientFeatures; #[derive(Debug, Serialize, Deserialize)] pub struct EdgeStatus { - status: String, + pub status: Status, } impl EdgeStatus { pub fn ok() -> Self { - EdgeStatus { - status: "OK".into(), - } + EdgeStatus { status: Status::Ok } } - pub fn not_ready() -> Self { EdgeStatus { - status: "NOT_READY".into(), + status: Status::NotReady, } } pub fn ready() -> Self { EdgeStatus { - status: "READY".into(), + status: Status::Ready, } } } @@ -104,7 +102,7 @@ mod tests { use crate::middleware; use crate::tests::upstream_server; use crate::tokens::cache_key; - use crate::types::{BuildInfo, EdgeToken, TokenInfo, TokenType, TokenValidationStatus}; + use crate::types::{BuildInfo, EdgeToken, Status, TokenInfo, TokenType, TokenValidationStatus}; use actix_web::body::MessageBody; use actix_web::http::header::ContentType; use actix_web::test; @@ -161,7 +159,7 @@ mod tests { let resp = test::call_service(&app, req).await; assert!(resp.status().is_success()); let status: EdgeStatus = test::read_body_json(resp).await; - assert_eq!(status.status, "NOT_READY"); + assert_eq!(status.status, Status::NotReady); } #[actix_web::test] @@ -194,7 +192,7 @@ mod tests { let resp = test::call_service(&app, req).await; assert!(resp.status().is_success()); let status: EdgeStatus = test::read_body_json(resp).await; - assert_eq!(status.status, "READY"); + assert_eq!(status.status, Status::Ready); } #[actix_web::test] diff --git a/server/src/lib.rs b/server/src/lib.rs index a951868e..042dec2f 100644 --- a/server/src/lib.rs +++ b/server/src/lib.rs @@ -21,6 +21,8 @@ pub mod openapi; pub mod persistence; #[cfg(not(tarpaulin_include))] pub mod prom_metrics; + +pub mod ready_checker; #[cfg(not(tarpaulin_include))] pub mod tls; pub mod tokens; diff --git a/server/src/main.rs b/server/src/main.rs index 71c59934..91009e1a 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -19,7 +19,9 @@ use unleash_edge::middleware::request_tracing::RequestTracing; use unleash_edge::offline::offline_hotload; use unleash_edge::persistence::{persist_data, EdgePersistence}; use unleash_edge::types::{EdgeToken, TokenRefresh, TokenValidationStatus}; -use unleash_edge::{admin_api, cli, client_api, frontend_api, health_checker, openapi}; +use unleash_edge::{ + admin_api, cli, client_api, frontend_api, health_checker, openapi, ready_checker, +}; use unleash_edge::{edge_api, prom_metrics}; use unleash_edge::{internal_backstage, tls}; @@ -38,6 +40,9 @@ async fn main() -> Result<(), anyhow::Error> { .await .map_err(|e| e.into()); }; + if let EdgeMode::Ready(args) = args.mode { + return ready_checker::check_ready(args).await.map_err(|e| e.into()); + } let schedule_args = args.clone(); let mode_arg = args.clone().mode; let http_args = args.clone().http; diff --git a/server/src/ready_checker.rs b/server/src/ready_checker.rs new file mode 100644 index 00000000..6e91fc60 --- /dev/null +++ b/server/src/ready_checker.rs @@ -0,0 +1,159 @@ +use crate::cli::ReadyCheckArgs; +use crate::error::EdgeError; +use crate::internal_backstage::EdgeStatus; +use crate::tls::build_upstream_certificate; +use crate::types::Status; +use reqwest::{ClientBuilder, Url}; + +fn build_ready_url(url: &Url) -> Url { + let mut with_path = url.clone(); + with_path + .path_segments_mut() + .expect("Could not build ready url") + .push("internal-backstage") + .push("ready"); + with_path +} + +pub async fn check_ready(ready_check_args: ReadyCheckArgs) -> Result<(), EdgeError> { + let client = match build_upstream_certificate(ready_check_args.ca_certificate_file)? { + Some(cert) => ClientBuilder::new() + .add_root_certificate(cert) + .build() + .expect("Failed to build ready check client"), + None => reqwest::Client::default(), + }; + let base_url = Url::parse(&ready_check_args.edge_url) + .map_err(|p| EdgeError::ReadyCheckError(format!("Invalid ready check url: {p:?}")))?; + let ready_check_url = build_ready_url(&base_url); + let r = client + .get(ready_check_url.clone()) + .send() + .await + .map_err(|e| { + EdgeError::ReadyCheckError(format!( + "Failed to connect to ready endpoint at {}. Failed with status {:?}", + ready_check_url, + e.status() + )) + })?; + if r.status() == 200 { + let ready_check_result: EdgeStatus = r.json().await.map_err(|e| { + EdgeError::ReadyCheckError(format!( + "Ready check endpoint returned data we didn't understand. {e:?}" + )) + })?; + match ready_check_result.status { + Status::Ready => { + println!("OK"); + Ok(()) + } + _ => Err(EdgeError::ReadyCheckError(format!( + "Ready check returned a different status than READY. It returned {:?}", + ready_check_result + ))), + } + } else { + Err(EdgeError::ReadyCheckError(format!( + "Ready check did not return 200 for {}. It returned {}", + ready_check_url, + r.status() + ))) + } +} + +#[cfg(test)] +mod tests { + use crate::cli::ReadyCheckArgs; + use crate::internal_backstage::ready; + use crate::ready_checker::check_ready; + use actix_http::HttpService; + use actix_http_test::test_server; + use actix_service::map_config; + use actix_web::dev::AppConfig; + use actix_web::{web, App, HttpResponse}; + use dashmap::DashMap; + use std::sync::Arc; + use unleash_types::client_features::{ClientFeature, ClientFeatures}; + + #[tokio::test] + pub async fn runs_ready_check() { + let features = ClientFeatures { + features: vec![ClientFeature { + name: "test".to_string(), + ..ClientFeature::default() + }], + query: None, + segments: None, + version: 2, + }; + let client_features: DashMap = DashMap::default(); + client_features.insert( + "testproject:testenvironment.testtoken".into(), + features.clone(), + ); + let client_features_arc = Arc::new(client_features); + let srv = test_server(move || { + HttpService::new(map_config( + App::new() + .app_data(web::Data::from(client_features_arc.clone())) + .service(web::scope("/internal-backstage").service(ready)), + |_| AppConfig::default(), + )) + .tcp() + }) + .await; + let url = srv.url("/"); + let check_result = check_ready(ReadyCheckArgs { + ca_certificate_file: None, + edge_url: url, + }) + .await; + assert!(check_result.is_ok()); + } + + #[tokio::test] + pub async fn errors_if_ready_check_fails() { + let check_result = check_ready(ReadyCheckArgs { + ca_certificate_file: None, + edge_url: "http://bogusurl".into(), + }) + .await; + assert!(check_result.is_err()); + } + + async fn conflict() -> HttpResponse { + HttpResponse::Conflict().finish() + } + + #[tokio::test] + pub async fn errors_if_ready_check_returns_different_status_than_200() { + let srv = test_server(move || { + HttpService::new(map_config( + App::new().service( + web::scope("/internal-backstage").route("/ready", web::get().to(conflict)), + ), + |_| AppConfig::default(), + )) + .tcp() + }) + .await; + let url = srv.url("/"); + let check_result = check_ready(ReadyCheckArgs { + ca_certificate_file: None, + edge_url: url, + }) + .await; + assert!(check_result.is_err()); + } + + #[tokio::test] + pub async fn fails_if_given_an_invalid_url() { + let check_result = check_ready(ReadyCheckArgs { + ca_certificate_file: None, + edge_url: ":\\///\\/".into(), + }) + .await; + assert!(check_result.is_err()); + } +} diff --git a/server/src/tokens.rs b/server/src/tokens.rs index 8d9529e4..2d87bc8c 100644 --- a/server/src/tokens.rs +++ b/server/src/tokens.rs @@ -126,9 +126,7 @@ impl FromRequest for EdgeToken { Some(v) => EdgeToken::try_from(v.clone()), None => Err(EdgeError::AuthorizationDenied), }, - EdgeMode::Health(_) => { - unreachable!("Trying to get token when running in healthcheck mode") - } + _ => unreachable!(), }; ready(key) } else { diff --git a/server/src/types.rs b/server/src/types.rs index 416c699a..19004f85 100644 --- a/server/src/types.rs +++ b/server/src/types.rs @@ -48,6 +48,14 @@ pub enum TokenValidationStatus { Validated, } +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "UPPERCASE")] +pub enum Status { + Ok, + NotOk, + NotReady, + Ready, +} #[derive(Clone, Debug)] pub struct ClientFeaturesRequest { pub api_key: String,