diff --git a/crates/edgen_core/src/settings.rs b/crates/edgen_core/src/settings.rs index 1924ed5..e781450 100644 --- a/crates/edgen_core/src/settings.rs +++ b/crates/edgen_core/src/settings.rs @@ -184,8 +184,12 @@ pub struct SettingsParams { pub audio_transcriptions_model_repo: String, /// The policy used to decided if models/session should be allocated and run on acceleration - /// hardware + /// hardware. pub gpu_policy: DevicePolicy, + + /// The maximum size, in bytes, any request can have. This is most relevant in requests with files, such as audio + /// transcriptions. + pub max_request_size: usize, } impl SettingsParams { @@ -234,6 +238,7 @@ impl Default for SettingsParams { gpu_policy: DevicePolicy::AlwaysDevice { overflow_to_cpu: true, }, + max_request_size: 1024 * 1014 * 100, // 100 MB } } } diff --git a/crates/edgen_server/src/lib.rs b/crates/edgen_server/src/lib.rs index 1ad53e3..f39ba50 100644 --- a/crates/edgen_server/src/lib.rs +++ b/crates/edgen_server/src/lib.rs @@ -15,18 +15,17 @@ #![deny(unsafe_code)] #![warn(missing_docs)] -use axum::extract::DefaultBodyLimit; use core::future::IntoFuture; use std::process::exit; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; -use tower_http::cors::CorsLayer; - +use axum::extract::DefaultBodyLimit; use futures::executor::block_on; use tokio::select; use tokio::sync::oneshot; use tokio::task::JoinSet; +use tower_http::cors::CorsLayer; use tracing::{error, info}; use utoipa::OpenApi; @@ -159,7 +158,7 @@ async fn start_server(args: &cli::Serve) -> EdgenResult { .await .init() .await - .expect("Failed to initialise settings"); + .expect("Failed to initialise settings. Please make sure the configuration file valid, or reset it via the system tray and restart Edgen.\nThe following error occurred"); settings::create_project_dirs().await.unwrap(); @@ -193,7 +192,9 @@ async fn run_server(args: &cli::Serve) -> Result { let http_app = routes::routes() .layer(CorsLayer::permissive()) - .layer(DefaultBodyLimit::max(1024 * 1024 * 1024)); + .layer(DefaultBodyLimit::max( + SETTINGS.read().await.read().await.max_request_size, + )); let uri_vector = if !args.uri.is_empty() { info!("Overriding default URI"); @@ -302,8 +303,6 @@ async fn run_server(args: &cli::Serve) -> Result { #[cfg(test)] mod tests { - use super::*; - use crate::openai_shim::TranscriptionResponse; use axum::routing::post; use axum::Router; use axum_test::multipart; @@ -311,6 +310,10 @@ mod tests { use levenshtein; use serde_json::from_str; + use crate::openai_shim::TranscriptionResponse; + + use super::*; + fn completion_streaming_request() -> String { r#" { diff --git a/docs/src/app/documentation/configuration/page.mdx b/docs/src/app/documentation/configuration/page.mdx index 76ee97b..3032b14 100644 --- a/docs/src/app/documentation/configuration/page.mdx +++ b/docs/src/app/documentation/configuration/page.mdx @@ -17,6 +17,7 @@ The Edgen configuration. It is read from a file where you can define your models | `audio_transcriptions_model_name` | Name of audio transcriptions model | ggml-distil-small.en.bin | | `audio_transcriptions_model_repo` | HuggingFace repo for audio transcriptions | distil-whisper/distil-small.en | | `gpu_policy` | Policy to choose how a model gets loaded | !always_device | +| `max_request_size` | Maximum size a request can have | 100 Megabytes | ## Configuration Paths for DATA_DIR