diff --git a/.gitignore b/.gitignore index 2d60ce0..5a172d0 100644 --- a/.gitignore +++ b/.gitignore @@ -13,4 +13,5 @@ __pycache__/ .cache # due to using vscode -.vscode/ \ No newline at end of file +.vscode/ +outputs/ \ No newline at end of file diff --git a/CICD/analyze.sh b/CICD/analyze.sh new file mode 100755 index 0000000..1d71f4f --- /dev/null +++ b/CICD/analyze.sh @@ -0,0 +1,4 @@ +python -m pylint src +python -m pyright src +python -m black src --check +python -m isort src --check-only \ No newline at end of file diff --git a/config/DevsDoCode-LLama-3-8b-Uncensored.yaml b/config/DevsDoCode-LLama-3-8b-Uncensored.yaml index 62ec61c..225025e 100644 --- a/config/DevsDoCode-LLama-3-8b-Uncensored.yaml +++ b/config/DevsDoCode-LLama-3-8b-Uncensored.yaml @@ -1,17 +1,12 @@ %YAML 1.1 --- -engine: !!str VLLMEngine -model_name: !!str "DevsDoCode/LLama-3-8b-Uncensored" +hf_model_name: !!str "Orenguteng/Llama-3-8B-Lexi-Uncensored-GGUF" tokenizer_name: "meta-llama/Meta-Llama-3-8B-Instruct" -host: !!str "127.0.0.1" -port: !!int 8000 +engine: !!str VLLMEngine +engine_config: -gui_title: !!str "LLama-3-8b-Uncensored" -default_max_new_tokens: !!int 1000 -default_temperature: !!float 0.6 -default_top_p: !!float 0.95 -default_repetition_penalty: !!float 1.2 -default_frequency_penalty: !!float 0.0 -default_presence_penalty: !!float 0.0 +defaults: + - gui_config: default + - uvicorn_config: default \ No newline at end of file diff --git a/config/IlyaGusev-saiga_llama3_8b-4bits.yaml b/config/IlyaGusev-saiga_llama3_8b-4bits.yaml index 388537b..457fd8e 100644 --- a/config/IlyaGusev-saiga_llama3_8b-4bits.yaml +++ b/config/IlyaGusev-saiga_llama3_8b-4bits.yaml @@ -1,18 +1,12 @@ %YAML 1.1 --- -engine: !!str HFEngine -model_name: !!str RichardErkhov/IlyaGusev_-_saiga_llama3_8b-4bits +hf_model_name: !!str RichardErkhov/IlyaGusev_-_saiga_llama3_8b-4bits tokenizer_name: !!str RichardErkhov/IlyaGusev_-_saiga_llama3_8b-4bits -# filename: !!str "*Q4_K_M.gguf" -host: !!str "127.0.0.1" -port: !!int 8000 -gui_title: !!str "LLama-3-8b-Uncensored" +engine: !!str HFEngine +engine_config: -default_max_new_tokens: !!int 1000 -default_temperature: !!float 0.6 -default_top_p: !!float 0.95 -default_repetition_penalty: !!float 1.2 -default_frequency_penalty: !!float 0.0 -default_presence_penalty: !!float 0.0 +defaults: + - gui_config: default + - uvicorn_config: default diff --git a/config/Llama-3-8B-Lexi-Uncensored.yaml b/config/Llama-3-8B-Lexi-Uncensored.yaml index 0df61b9..15e754b 100644 --- a/config/Llama-3-8B-Lexi-Uncensored.yaml +++ b/config/Llama-3-8B-Lexi-Uncensored.yaml @@ -1,19 +1,14 @@ %YAML 1.1 --- -engine: !!str LlamaCppEngine -model_name: !!str "Orenguteng/Llama-3-8B-Lexi-Uncensored-GGUF" +hf_model_name: !!str "Orenguteng/Llama-3-8B-Lexi-Uncensored-GGUF" tokenizer_name: !!str "meta-llama/Meta-Llama-3-8B-Instruct" -filename: !!str "*Q4_K_M.gguf" -n_gpu_layers: 0 -host: !!str "127.0.0.1" -port: !!int 8000 -gui_title: !!str "LLama-3-8b-Uncensored-4bit-pruned" +engine: !!str LlamaCppEngine +engine_config: + filename: !!str "*Q4_K_M.gguf" + n_gpu_layers: 0 -default_max_new_tokens: !!int 1000 -default_temperature: !!float 0.6 -default_top_p: !!float 0.95 -default_repetition_penalty: !!float 1.2 -default_frequency_penalty: !!float 0.0 -default_presence_penalty: !!float 0.0 +defaults: + - gui_config: default + - uvicorn_config: default diff --git a/config/Vikhr.yaml b/config/Vikhr.yaml index 4a08574..8a05491 100644 --- a/config/Vikhr.yaml +++ b/config/Vikhr.yaml @@ -1,18 +1,13 @@ %YAML 1.1 --- -engine: !!str VLLMEngine -model_name: !!str "Vikhrmodels/Vikhr-7B-instruct_0.2" +hf_model_name: !!str "Vikhrmodels/Vikhr-7B-instruct_0.2" tokenizer_name: "Vikhrmodels/Vikhr-7B-instruct_0.2" -host: !!str "127.0.0.1" -port: !!int 8000 +engine: !!str VLLMEngine +engine_config: -gui_title: !!str "Vikhr-7B-instruct_0.2" -default_max_new_tokens: !!int 1000 -default_temperature: !!float 0.6 -default_top_p: !!float 0.95 -default_repetition_penalty: !!float 1.2 -default_frequency_penalty: !!float 0.0 -default_presence_penalty: !!float 0.0 +defaults: + - gui_config: default + - uvicorn_config: default diff --git a/config/gui_config/default.yaml b/config/gui_config/default.yaml new file mode 100644 index 0000000..e22cbfe --- /dev/null +++ b/config/gui_config/default.yaml @@ -0,0 +1,7 @@ +default_sampling_config: + max_new_tokens: !!int 1000 + temperature: !!float 0.6 + top_p: !!float 0.95 + repetition_penalty: !!float 1.2 + frequency_penalty: !!float 0.0 + presence_penalty: !!float 0.0 diff --git a/config/meta-llama-Meta-Llama-3-8B-Instruct.yaml b/config/meta-llama-Meta-Llama-3-8B-Instruct.yaml index f6ca0d8..01c0146 100644 --- a/config/meta-llama-Meta-Llama-3-8B-Instruct.yaml +++ b/config/meta-llama-Meta-Llama-3-8B-Instruct.yaml @@ -1,16 +1,12 @@ %YAML 1.1 --- -engine: !!str VLLMEngine -model_name: !!str "meta-llama/Meta-Llama-3-8B-Instruct" +hf_model_name: !!str "meta-llama/Meta-Llama-3-8B-Instruct" +tokenizer_name: !!str "meta-llama/Meta-Llama-3-8B-Instruct" -host: !!str "127.0.0.1" -port: !!int 8000 -gui_title: !!str "meta-llama/Meta-Llama-3-8B-Instruct" +engine: !!str VLLMEngine +engine_config: -default_max_new_tokens: !!int 1000 -default_temperature: !!float 0.6 -default_top_p: !!float 0.95 -default_repetition_penalty: !!float 1.2 -default_frequency_penalty: !!float 0.0 -default_presence_penalty: !!float 0.0 +defaults: + - gui_config: default + - uvicorn_config: default diff --git a/config/uvicorn_config/default.yaml b/config/uvicorn_config/default.yaml new file mode 100644 index 0000000..e0769fe --- /dev/null +++ b/config/uvicorn_config/default.yaml @@ -0,0 +1,4 @@ +host: !!str "127.0.0.1" +port: !!int 8000 +log_level: !!str debug +loop: !!str asyncio \ No newline at end of file diff --git a/src/AGISwarm/llm_instruct_ms/__main__.py b/src/AGISwarm/llm_instruct_ms/__main__.py index c452d48..c8478aa 100644 --- a/src/AGISwarm/llm_instruct_ms/__main__.py +++ b/src/AGISwarm/llm_instruct_ms/__main__.py @@ -1,25 +1,30 @@ """Main module for the LLM instruct microservice""" -from argparse import ArgumentParser +import os +from pathlib import Path +import hydra import uvicorn from .app import LLMInstructApp -from .settings import LLMInstructSettings +from .typing import LLMInstructConfig -def main(): +@hydra.main( + config_path=str(Path(os.getcwd()) / "config"), +) +def main(config: LLMInstructConfig): """Main function""" - parser = ArgumentParser() - parser.add_argument("--config", type=str, help="Path to the configuration file") - args = parser.parse_args() - yaml_path = args.config - - settings = LLMInstructSettings.from_yaml(yaml_path) - llm_instruct_app = LLMInstructApp(settings) - uvicorn.run(llm_instruct_app.app, host=settings.host, port=settings.port) + llm_instruct_app = LLMInstructApp(config) + uvicorn.run( + llm_instruct_app.app, + host=config.uvicorn_config.host, + port=config.uvicorn_config.port, + log_level=config.uvicorn_config.log_level, + loop=config.uvicorn_config.loop, + ) if __name__ == "__main__": - main() + main() # pylint: disable=no-value-for-parameter diff --git a/src/AGISwarm/llm_instruct_ms/app/app.py b/src/AGISwarm/llm_instruct_ms/app.py similarity index 79% rename from src/AGISwarm/llm_instruct_ms/app/app.py rename to src/AGISwarm/llm_instruct_ms/app.py index 3c8e946..dac983d 100644 --- a/src/AGISwarm/llm_instruct_ms/app/app.py +++ b/src/AGISwarm/llm_instruct_ms/app.py @@ -2,28 +2,38 @@ import uuid from pathlib import Path -from typing import Any, Dict, List +from typing import Any, Dict, List, cast from fastapi import APIRouter, FastAPI, WebSocket, WebSocketDisconnect from fastapi.responses import FileResponse from fastapi.staticfiles import StaticFiles from jinja2 import Environment, FileSystemLoader -from pydantic_settings import BaseSettings +from omegaconf import OmegaConf +from pydantic import BaseModel -from ..llm_engines import EngineProtocol -from ..settings import ENGINE_MAP, ENGINE_SAMPLING_PARAMS_MAP, LLMInstructSettings +from .llm_engines import EngineProtocol +from .typing import ( + ENGINE_CONFIG_MAP, + ENGINE_MAP, + ENGINE_SAMPLING_PARAMS_MAP, + LLMInstructConfig, +) class LLMInstructApp: # pylint: disable=too-few-public-methods """Application factory""" - def __init__(self, settings: LLMInstructSettings): - self.settings = settings + def __init__(self, config: LLMInstructConfig): + self.config = config self.app = FastAPI() - self.llm: EngineProtocol[Any] = ENGINE_MAP[settings.engine]( - **settings.engine_settings.model_dump() + if config.engine_config is None: + config.engine_config = ENGINE_CONFIG_MAP[config.engine]() + self.llm: EngineProtocol[Any] = ENGINE_MAP[config.engine]( # type: ignore + hf_model_name=config.hf_model_name, + tokenizer_name=config.tokenizer_name, + **cast(dict, OmegaConf.to_container(config.engine_config)), ) - self.sampling_settings_cls = ENGINE_SAMPLING_PARAMS_MAP[settings.engine] + self.sampling_settings_cls = ENGINE_SAMPLING_PARAMS_MAP[config.engine] self._configure_routers() def _configure_routers(self): @@ -52,7 +62,9 @@ async def get_root(): # type: ignore ) as f: f.write( template.render( - **self.settings.model_dump(), + OmegaConf.to_container( + self.config.gui_config.default_sampling_config + ), ) ) return FileResponse(Path(__file__).parent / "gui" / "current_index.html") @@ -94,6 +106,9 @@ async def generate(websocket: WebSocket): # type: ignore elif response["response"] == "success": reply += response["msg"] await websocket.send_json(response) + elif response["response"] == "abort": + await websocket.send_json(response) + break else: raise ValueError( f"Invalid response: {response['response']}" @@ -111,7 +126,7 @@ async def generate(websocket: WebSocket): # type: ignore finally: await websocket.close() - class AbortRequest(BaseSettings): + class AbortRequest(BaseModel): """Abort request""" request_id: str diff --git a/src/AGISwarm/llm_instruct_ms/app/__init__.py b/src/AGISwarm/llm_instruct_ms/app/__init__.py deleted file mode 100644 index f37c863..0000000 --- a/src/AGISwarm/llm_instruct_ms/app/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -"""LLM Instruct App Module""" - -from .app import LLMInstructApp diff --git a/src/AGISwarm/llm_instruct_ms/app/gui/current_index.html b/src/AGISwarm/llm_instruct_ms/gui/current_index.html similarity index 97% rename from src/AGISwarm/llm_instruct_ms/app/gui/current_index.html rename to src/AGISwarm/llm_instruct_ms/gui/current_index.html index 7dde231..7098803 100644 --- a/src/AGISwarm/llm_instruct_ms/app/gui/current_index.html +++ b/src/AGISwarm/llm_instruct_ms/gui/current_index.html @@ -1,7 +1,7 @@
-