diff --git a/.gitignore b/.gitignore index 2d60ce0..5a172d0 100644 --- a/.gitignore +++ b/.gitignore @@ -13,4 +13,5 @@ __pycache__/ .cache # due to using vscode -.vscode/ \ No newline at end of file +.vscode/ +outputs/ \ No newline at end of file diff --git a/CICD/analyze.sh b/CICD/analyze.sh new file mode 100755 index 0000000..1d71f4f --- /dev/null +++ b/CICD/analyze.sh @@ -0,0 +1,4 @@ +python -m pylint src +python -m pyright src +python -m black src --check +python -m isort src --check-only \ No newline at end of file diff --git a/config/DevsDoCode-LLama-3-8b-Uncensored.yaml b/config/DevsDoCode-LLama-3-8b-Uncensored.yaml index 62ec61c..225025e 100644 --- a/config/DevsDoCode-LLama-3-8b-Uncensored.yaml +++ b/config/DevsDoCode-LLama-3-8b-Uncensored.yaml @@ -1,17 +1,12 @@ %YAML 1.1 --- -engine: !!str VLLMEngine -model_name: !!str "DevsDoCode/LLama-3-8b-Uncensored" +hf_model_name: !!str "Orenguteng/Llama-3-8B-Lexi-Uncensored-GGUF" tokenizer_name: "meta-llama/Meta-Llama-3-8B-Instruct" -host: !!str "127.0.0.1" -port: !!int 8000 +engine: !!str VLLMEngine +engine_config: -gui_title: !!str "LLama-3-8b-Uncensored" -default_max_new_tokens: !!int 1000 -default_temperature: !!float 0.6 -default_top_p: !!float 0.95 -default_repetition_penalty: !!float 1.2 -default_frequency_penalty: !!float 0.0 -default_presence_penalty: !!float 0.0 +defaults: + - gui_config: default + - uvicorn_config: default \ No newline at end of file diff --git a/config/IlyaGusev-saiga_llama3_8b-4bits.yaml b/config/IlyaGusev-saiga_llama3_8b-4bits.yaml index 388537b..457fd8e 100644 --- a/config/IlyaGusev-saiga_llama3_8b-4bits.yaml +++ b/config/IlyaGusev-saiga_llama3_8b-4bits.yaml @@ -1,18 +1,12 @@ %YAML 1.1 --- -engine: !!str HFEngine -model_name: !!str RichardErkhov/IlyaGusev_-_saiga_llama3_8b-4bits +hf_model_name: !!str RichardErkhov/IlyaGusev_-_saiga_llama3_8b-4bits tokenizer_name: !!str RichardErkhov/IlyaGusev_-_saiga_llama3_8b-4bits -# filename: !!str "*Q4_K_M.gguf" -host: !!str "127.0.0.1" -port: !!int 8000 -gui_title: !!str "LLama-3-8b-Uncensored" +engine: !!str HFEngine +engine_config: -default_max_new_tokens: !!int 1000 -default_temperature: !!float 0.6 -default_top_p: !!float 0.95 -default_repetition_penalty: !!float 1.2 -default_frequency_penalty: !!float 0.0 -default_presence_penalty: !!float 0.0 +defaults: + - gui_config: default + - uvicorn_config: default diff --git a/config/Llama-3-8B-Lexi-Uncensored.yaml b/config/Llama-3-8B-Lexi-Uncensored.yaml index 0df61b9..15e754b 100644 --- a/config/Llama-3-8B-Lexi-Uncensored.yaml +++ b/config/Llama-3-8B-Lexi-Uncensored.yaml @@ -1,19 +1,14 @@ %YAML 1.1 --- -engine: !!str LlamaCppEngine -model_name: !!str "Orenguteng/Llama-3-8B-Lexi-Uncensored-GGUF" +hf_model_name: !!str "Orenguteng/Llama-3-8B-Lexi-Uncensored-GGUF" tokenizer_name: !!str "meta-llama/Meta-Llama-3-8B-Instruct" -filename: !!str "*Q4_K_M.gguf" -n_gpu_layers: 0 -host: !!str "127.0.0.1" -port: !!int 8000 -gui_title: !!str "LLama-3-8b-Uncensored-4bit-pruned" +engine: !!str LlamaCppEngine +engine_config: + filename: !!str "*Q4_K_M.gguf" + n_gpu_layers: 0 -default_max_new_tokens: !!int 1000 -default_temperature: !!float 0.6 -default_top_p: !!float 0.95 -default_repetition_penalty: !!float 1.2 -default_frequency_penalty: !!float 0.0 -default_presence_penalty: !!float 0.0 +defaults: + - gui_config: default + - uvicorn_config: default diff --git a/config/Vikhr.yaml b/config/Vikhr.yaml index 4a08574..8a05491 100644 --- a/config/Vikhr.yaml +++ b/config/Vikhr.yaml @@ -1,18 +1,13 @@ %YAML 1.1 --- -engine: !!str VLLMEngine -model_name: !!str "Vikhrmodels/Vikhr-7B-instruct_0.2" +hf_model_name: !!str "Vikhrmodels/Vikhr-7B-instruct_0.2" tokenizer_name: "Vikhrmodels/Vikhr-7B-instruct_0.2" -host: !!str "127.0.0.1" -port: !!int 8000 +engine: !!str VLLMEngine +engine_config: -gui_title: !!str "Vikhr-7B-instruct_0.2" -default_max_new_tokens: !!int 1000 -default_temperature: !!float 0.6 -default_top_p: !!float 0.95 -default_repetition_penalty: !!float 1.2 -default_frequency_penalty: !!float 0.0 -default_presence_penalty: !!float 0.0 +defaults: + - gui_config: default + - uvicorn_config: default diff --git a/config/gui_config/default.yaml b/config/gui_config/default.yaml new file mode 100644 index 0000000..e22cbfe --- /dev/null +++ b/config/gui_config/default.yaml @@ -0,0 +1,7 @@ +default_sampling_config: + max_new_tokens: !!int 1000 + temperature: !!float 0.6 + top_p: !!float 0.95 + repetition_penalty: !!float 1.2 + frequency_penalty: !!float 0.0 + presence_penalty: !!float 0.0 diff --git a/config/meta-llama-Meta-Llama-3-8B-Instruct.yaml b/config/meta-llama-Meta-Llama-3-8B-Instruct.yaml index f6ca0d8..01c0146 100644 --- a/config/meta-llama-Meta-Llama-3-8B-Instruct.yaml +++ b/config/meta-llama-Meta-Llama-3-8B-Instruct.yaml @@ -1,16 +1,12 @@ %YAML 1.1 --- -engine: !!str VLLMEngine -model_name: !!str "meta-llama/Meta-Llama-3-8B-Instruct" +hf_model_name: !!str "meta-llama/Meta-Llama-3-8B-Instruct" +tokenizer_name: !!str "meta-llama/Meta-Llama-3-8B-Instruct" -host: !!str "127.0.0.1" -port: !!int 8000 -gui_title: !!str "meta-llama/Meta-Llama-3-8B-Instruct" +engine: !!str VLLMEngine +engine_config: -default_max_new_tokens: !!int 1000 -default_temperature: !!float 0.6 -default_top_p: !!float 0.95 -default_repetition_penalty: !!float 1.2 -default_frequency_penalty: !!float 0.0 -default_presence_penalty: !!float 0.0 +defaults: + - gui_config: default + - uvicorn_config: default diff --git a/config/uvicorn_config/default.yaml b/config/uvicorn_config/default.yaml new file mode 100644 index 0000000..e0769fe --- /dev/null +++ b/config/uvicorn_config/default.yaml @@ -0,0 +1,4 @@ +host: !!str "127.0.0.1" +port: !!int 8000 +log_level: !!str debug +loop: !!str asyncio \ No newline at end of file diff --git a/src/AGISwarm/llm_instruct_ms/__main__.py b/src/AGISwarm/llm_instruct_ms/__main__.py index c452d48..c8478aa 100644 --- a/src/AGISwarm/llm_instruct_ms/__main__.py +++ b/src/AGISwarm/llm_instruct_ms/__main__.py @@ -1,25 +1,30 @@ """Main module for the LLM instruct microservice""" -from argparse import ArgumentParser +import os +from pathlib import Path +import hydra import uvicorn from .app import LLMInstructApp -from .settings import LLMInstructSettings +from .typing import LLMInstructConfig -def main(): +@hydra.main( + config_path=str(Path(os.getcwd()) / "config"), +) +def main(config: LLMInstructConfig): """Main function""" - parser = ArgumentParser() - parser.add_argument("--config", type=str, help="Path to the configuration file") - args = parser.parse_args() - yaml_path = args.config - - settings = LLMInstructSettings.from_yaml(yaml_path) - llm_instruct_app = LLMInstructApp(settings) - uvicorn.run(llm_instruct_app.app, host=settings.host, port=settings.port) + llm_instruct_app = LLMInstructApp(config) + uvicorn.run( + llm_instruct_app.app, + host=config.uvicorn_config.host, + port=config.uvicorn_config.port, + log_level=config.uvicorn_config.log_level, + loop=config.uvicorn_config.loop, + ) if __name__ == "__main__": - main() + main() # pylint: disable=no-value-for-parameter diff --git a/src/AGISwarm/llm_instruct_ms/app/app.py b/src/AGISwarm/llm_instruct_ms/app.py similarity index 79% rename from src/AGISwarm/llm_instruct_ms/app/app.py rename to src/AGISwarm/llm_instruct_ms/app.py index 3c8e946..dac983d 100644 --- a/src/AGISwarm/llm_instruct_ms/app/app.py +++ b/src/AGISwarm/llm_instruct_ms/app.py @@ -2,28 +2,38 @@ import uuid from pathlib import Path -from typing import Any, Dict, List +from typing import Any, Dict, List, cast from fastapi import APIRouter, FastAPI, WebSocket, WebSocketDisconnect from fastapi.responses import FileResponse from fastapi.staticfiles import StaticFiles from jinja2 import Environment, FileSystemLoader -from pydantic_settings import BaseSettings +from omegaconf import OmegaConf +from pydantic import BaseModel -from ..llm_engines import EngineProtocol -from ..settings import ENGINE_MAP, ENGINE_SAMPLING_PARAMS_MAP, LLMInstructSettings +from .llm_engines import EngineProtocol +from .typing import ( + ENGINE_CONFIG_MAP, + ENGINE_MAP, + ENGINE_SAMPLING_PARAMS_MAP, + LLMInstructConfig, +) class LLMInstructApp: # pylint: disable=too-few-public-methods """Application factory""" - def __init__(self, settings: LLMInstructSettings): - self.settings = settings + def __init__(self, config: LLMInstructConfig): + self.config = config self.app = FastAPI() - self.llm: EngineProtocol[Any] = ENGINE_MAP[settings.engine]( - **settings.engine_settings.model_dump() + if config.engine_config is None: + config.engine_config = ENGINE_CONFIG_MAP[config.engine]() + self.llm: EngineProtocol[Any] = ENGINE_MAP[config.engine]( # type: ignore + hf_model_name=config.hf_model_name, + tokenizer_name=config.tokenizer_name, + **cast(dict, OmegaConf.to_container(config.engine_config)), ) - self.sampling_settings_cls = ENGINE_SAMPLING_PARAMS_MAP[settings.engine] + self.sampling_settings_cls = ENGINE_SAMPLING_PARAMS_MAP[config.engine] self._configure_routers() def _configure_routers(self): @@ -52,7 +62,9 @@ async def get_root(): # type: ignore ) as f: f.write( template.render( - **self.settings.model_dump(), + OmegaConf.to_container( + self.config.gui_config.default_sampling_config + ), ) ) return FileResponse(Path(__file__).parent / "gui" / "current_index.html") @@ -94,6 +106,9 @@ async def generate(websocket: WebSocket): # type: ignore elif response["response"] == "success": reply += response["msg"] await websocket.send_json(response) + elif response["response"] == "abort": + await websocket.send_json(response) + break else: raise ValueError( f"Invalid response: {response['response']}" @@ -111,7 +126,7 @@ async def generate(websocket: WebSocket): # type: ignore finally: await websocket.close() - class AbortRequest(BaseSettings): + class AbortRequest(BaseModel): """Abort request""" request_id: str diff --git a/src/AGISwarm/llm_instruct_ms/app/__init__.py b/src/AGISwarm/llm_instruct_ms/app/__init__.py deleted file mode 100644 index f37c863..0000000 --- a/src/AGISwarm/llm_instruct_ms/app/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -"""LLM Instruct App Module""" - -from .app import LLMInstructApp diff --git a/src/AGISwarm/llm_instruct_ms/app/gui/current_index.html b/src/AGISwarm/llm_instruct_ms/gui/current_index.html similarity index 97% rename from src/AGISwarm/llm_instruct_ms/app/gui/current_index.html rename to src/AGISwarm/llm_instruct_ms/gui/current_index.html index 7dde231..7098803 100644 --- a/src/AGISwarm/llm_instruct_ms/app/gui/current_index.html +++ b/src/AGISwarm/llm_instruct_ms/gui/current_index.html @@ -1,7 +1,7 @@ - LLama-3-8b-Uncensored + LLM Instruct Interface @@ -15,7 +15,7 @@
-

LLama-3-8b-Uncensored

+

diff --git a/src/AGISwarm/llm_instruct_ms/app/gui/jinja2.html b/src/AGISwarm/llm_instruct_ms/gui/jinja2.html similarity index 77% rename from src/AGISwarm/llm_instruct_ms/app/gui/jinja2.html rename to src/AGISwarm/llm_instruct_ms/gui/jinja2.html index c09fb5b..dbf3fa5 100644 --- a/src/AGISwarm/llm_instruct_ms/app/gui/jinja2.html +++ b/src/AGISwarm/llm_instruct_ms/gui/jinja2.html @@ -1,7 +1,7 @@ - {{gui_title}} + LLM Instruct Interface @@ -28,7 +28,7 @@

{{gui_title}}

+ value={{system_prompt}}>
@@ -37,49 +37,49 @@

{{gui_title}}

+ value="{{max_new_tokens}}">
Temperature: + value="{{temperature}}">
Top P: + value="{{top_p}}">
Repetition Penalty: + value="{{repetition_penalty}}">
Frequency Penalty: + value="{{frequency_penalty}}">
Presence Penalty: + value="{{presence_penalty}}">
diff --git a/src/AGISwarm/llm_instruct_ms/app/gui/scripts.js b/src/AGISwarm/llm_instruct_ms/gui/scripts.js similarity index 100% rename from src/AGISwarm/llm_instruct_ms/app/gui/scripts.js rename to src/AGISwarm/llm_instruct_ms/gui/scripts.js diff --git a/src/AGISwarm/llm_instruct_ms/app/gui/style.css b/src/AGISwarm/llm_instruct_ms/gui/style.css similarity index 100% rename from src/AGISwarm/llm_instruct_ms/app/gui/style.css rename to src/AGISwarm/llm_instruct_ms/gui/style.css diff --git a/src/AGISwarm/llm_instruct_ms/llm_engines/hf_engine.py b/src/AGISwarm/llm_instruct_ms/llm_engines/hf_engine.py index ec94515..1476568 100644 --- a/src/AGISwarm/llm_instruct_ms/llm_engines/hf_engine.py +++ b/src/AGISwarm/llm_instruct_ms/llm_engines/hf_engine.py @@ -60,8 +60,8 @@ class HFEngine(EngineProtocol[HFSamplingParams]): # pylint: disable=invalid-nam def __init__( self, - model_name: str = "IlyaGusev/saiga_llama3_8b", - tokenizer_name: str = "meta-llama/Meta-Llama-3-8B-Instruct", + hf_model_name: str, + tokenizer_name: str | None, ): self.tokenizer = transformers.AutoTokenizer.from_pretrained(tokenizer_name) @@ -69,12 +69,12 @@ def __init__( transformers.TextGenerationPipeline, transformers.pipeline( task="text-generation", - model=model_name, + model=hf_model_name, device_map="auto", tokenizer=self.tokenizer, model_kwargs={ "quantization_config": ( - BNB_CONFIG if MODEL_IS_4bit[model_name] else None + BNB_CONFIG if MODEL_IS_4bit[hf_model_name] else None ) }, ), diff --git a/src/AGISwarm/llm_instruct_ms/llm_engines/llama_cpp_engine.py b/src/AGISwarm/llm_instruct_ms/llm_engines/llama_cpp_engine.py index 212302a..5945e6e 100644 --- a/src/AGISwarm/llm_instruct_ms/llm_engines/llama_cpp_engine.py +++ b/src/AGISwarm/llm_instruct_ms/llm_engines/llama_cpp_engine.py @@ -36,17 +36,17 @@ class LlamaCppEngine(EngineProtocol[LlamaCppSamplingParams]): def __init__( # pylint: disable=too-many-arguments self, - model_name: str, + hf_model_name: str, tokenizer_name: str | None, filename: str, n_gpu_layers: int = -1, n_ctx: int = 8192, ): self.llama = Llama.from_pretrained( - model_name, filename=filename, n_gpu_layers=n_gpu_layers, n_ctx=n_ctx + hf_model_name, filename=filename, n_gpu_layers=n_gpu_layers, n_ctx=n_ctx ) self.tokenizer: object = AutoTokenizer.from_pretrained( - tokenizer_name or model_name + tokenizer_name or hf_model_name ) def get_sampling_params(self, sampling_params: LlamaCppSamplingParams): diff --git a/src/AGISwarm/llm_instruct_ms/llm_engines/vllm_engine.py b/src/AGISwarm/llm_instruct_ms/llm_engines/vllm_engine.py index da51e81..9fd1172 100644 --- a/src/AGISwarm/llm_instruct_ms/llm_engines/vllm_engine.py +++ b/src/AGISwarm/llm_instruct_ms/llm_engines/vllm_engine.py @@ -21,11 +21,11 @@ class VLLMSamplingParams(SamplingParams): class VLLMEngine(EngineProtocol[VLLMSamplingParams]): """LLM Instruct Model Inference using VLLM""" - def __init__(self, model_name: str, tokenizer_name: str | None = None): + def __init__(self, hf_model_name: str, tokenizer_name: str | None = None): self.model = vllm.AsyncLLMEngine.from_engine_args( vllm.AsyncEngineArgs( - model=model_name, - tokenizer=tokenizer_name or model_name, + model=hf_model_name, + tokenizer=tokenizer_name or hf_model_name, dtype="float16", tensor_parallel_size=2, gpu_memory_utilization=1.0, diff --git a/src/AGISwarm/llm_instruct_ms/settings.py b/src/AGISwarm/llm_instruct_ms/settings.py deleted file mode 100644 index 16f4d8c..0000000 --- a/src/AGISwarm/llm_instruct_ms/settings.py +++ /dev/null @@ -1,117 +0,0 @@ -"""Application settings""" - -from pathlib import Path -from typing import Dict, Literal, Type, Union - -from pydantic_settings import BaseSettings -from ruamel.yaml import YAML - -from .llm_engines import ( - HFEngine, - HFSamplingParams, - LlamaCppEngine, - LlamaCppSamplingParams, - VLLMEngine, - VLLMSamplingParams, -) - -ENGINE_MAP: Dict[str, Type[Union[HFEngine, VLLMEngine, LlamaCppEngine]]] = { - "HFEngine": HFEngine, - "VLLMEngine": VLLMEngine, - "LlamaCppEngine": LlamaCppEngine, -} - -ENGINE_SAMPLING_PARAMS_MAP: Dict[ - str, Type[Union[HFSamplingParams, VLLMSamplingParams, LlamaCppSamplingParams]] -] = { - "HFEngine": HFSamplingParams, - "VLLMEngine": VLLMSamplingParams, - "LlamaCppEngine": LlamaCppSamplingParams, -} - - -class EngineType(BaseSettings): - """Engine type""" - - engine: Literal["HFEngine", "VLLMEngine", "LlamaCppEngine"] = "HFEngine" - - -class ModelSettings(BaseSettings): - """Model settings""" - - model_name: str = "DevsDoCode/LLama-3-8b-Uncensored" - tokenizer_name: str | None = None - - -class VLLMSettings(ModelSettings): - """VLLM settings""" - - -class HFSettings(ModelSettings): - """HF settings""" - - -class LlamaCppSettings(ModelSettings): - """LlamaCpp settings""" - - filename: str = "*F16.gguf" - n_gpu_layers: int = -1 - n_ctx: int = 8192 - - -ENGINE_SETTINGS_MAP: Dict[str, Type] = { - "HFEngine": HFSettings, - "VLLMEngine": VLLMSettings, - "LlamaCppEngine": LlamaCppSettings, -} - - -class GUISettings(BaseSettings): - """GUI settings""" - - gui_title: str = "LLM Instruct" - - -class DefaultSamplingSettings(BaseSettings): - """Default sampling settings""" - - default_max_new_tokens: int = 1000 - default_temperature: float = 0.6 - default_top_p: float = 0.95 - default_repetition_penalty: float = 1.2 - default_frequency_penalty: float = 0.0 - default_presence_penalty: float = 0.0 - - -class NetworkingSettings(BaseSettings): - """Application settings""" - - host: str = "127.0.0.1" - port: int = 8000 - - -class LLMInstructSettings( - ModelSettings, GUISettings, DefaultSamplingSettings, NetworkingSettings, EngineType -): - """LLM Instruct settings""" - - engine_settings: Union[VLLMSettings, HFSettings, LlamaCppSettings] - - @classmethod - def from_yaml(cls, path: Path) -> "LLMInstructSettings": - """Create settings from YAML""" - with open(path, "r", encoding="utf-8") as file: - yaml: dict = YAML(typ="safe", pure=True).load(file) - engine_type = EngineType(engine=yaml["engine"]) - engine_settings = cls.__choose_engine(engine_type).model_dump() - for key in engine_settings.keys(): - engine_settings[key] = yaml.pop(key, engine_settings[key]) - yaml["engine_settings"] = engine_settings - return cls(**yaml) - - @classmethod - def __choose_engine( # pylint: disable=missing-function-docstring - cls, engine_type: EngineType - ): - engine = engine_type.engine - return ENGINE_SETTINGS_MAP[engine]() diff --git a/src/AGISwarm/llm_instruct_ms/typing.py b/src/AGISwarm/llm_instruct_ms/typing.py new file mode 100644 index 0000000..84277b0 --- /dev/null +++ b/src/AGISwarm/llm_instruct_ms/typing.py @@ -0,0 +1,105 @@ +"""Application settings""" + +from dataclasses import dataclass +from typing import Dict, Literal, Optional, Type, Union + +from omegaconf import DictConfig +from uvicorn.config import LoopSetupType + +from .llm_engines import ( + HFEngine, + HFSamplingParams, + LlamaCppEngine, + LlamaCppSamplingParams, + VLLMEngine, + VLLMSamplingParams, +) + +ENGINE_MAP: Dict[str, Type[Union[HFEngine, VLLMEngine, LlamaCppEngine]]] = { + "HFEngine": HFEngine, + "VLLMEngine": VLLMEngine, + "LlamaCppEngine": LlamaCppEngine, +} + +ENGINE_SAMPLING_PARAMS_MAP: Dict[ + str, Type[Union[HFSamplingParams, VLLMSamplingParams, LlamaCppSamplingParams]] +] = { + "HFEngine": HFSamplingParams, + "VLLMEngine": VLLMSamplingParams, + "LlamaCppEngine": LlamaCppSamplingParams, +} + + +@dataclass +class ModelConfig(DictConfig): + """Model settings""" + + +@dataclass +class VLLMConfig(ModelConfig): + """VLLM settings""" + + +@dataclass +class HFConfig(ModelConfig): + """HF settings""" + + +@dataclass +class LlamaCppConfig(ModelConfig): + """LlamaCpp settings""" + + filename: str = "*F16.gguf" + n_gpu_layers: int = -1 + n_ctx: int = 8192 + + +ENGINE_CONFIG_MAP: Dict[str, Type] = { + "HFEngine": HFConfig, + "VLLMEngine": VLLMConfig, + "LlamaCppEngine": LlamaCppConfig, +} + + +@dataclass +class SamplingConfig(DictConfig): + """Default sampling settings""" + + max_new_tokens: int = 1000 + temperature: float = 0.6 + top_p: float = 0.95 + repetition_penalty: float = 1.2 + frequency_penalty: float = 0.0 + presence_penalty: float = 0.0 + + +@dataclass +class UvicornConfig(DictConfig): + """ + A class to hold the configuration for the Uvicorn. + """ + + host: str + port: int + log_level: str + loop: LoopSetupType + + +@dataclass +class GUIConfig(DictConfig): + """GUI settings""" + + default_sampling_config: SamplingConfig + + +@dataclass +class LLMInstructConfig(DictConfig): + """LLM Instruct settings""" + + hf_model_name: str + tokenizer_name: str | None + engine: Literal["HFEngine", "VLLMEngine", "LlamaCppEngine"] + engine_config: Optional[Union[HFConfig, VLLMConfig, LlamaCppConfig]] + gui_config: GUIConfig + uvicorn_config: UvicornConfig + sampling_settings: SamplingConfig