diff --git a/.gitignore b/.gitignore
index 6b69f654..ba774f9d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -222,6 +222,9 @@ output
*.local.toml
localdata/
+docker/app_data
model_repository/
*.tmp
__*
+
+arms.trace*
diff --git a/Dockerfile b/Dockerfile
index b1ff810d..f86f4937 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -24,7 +24,7 @@ ENV VIRTUAL_ENV=/app/.venv \
ENABLE_AIOHTTPCLIENT=false \
ENABLE_HTTPX=false
-RUN apt-get update && apt-get install -y libgl1 libglib2.0-0
+RUN apt-get update && apt-get install -y libgl1 libglib2.0-0 curl
WORKDIR /app
COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
COPY . .
diff --git a/Dockerfile_gpu b/Dockerfile_gpu
index 22973518..455b86d4 100644
--- a/Dockerfile_gpu
+++ b/Dockerfile_gpu
@@ -26,7 +26,7 @@ ENV VIRTUAL_ENV=/app/.venv \
ENABLE_AIOHTTPCLIENT=false \
ENABLE_HTTPX=false
-RUN apt-get update && apt-get install -y libgl1 libglib2.0-0
+RUN apt-get update && apt-get install -y libgl1 libglib2.0-0 curl
WORKDIR /app
COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
diff --git a/Dockerfile_nginx b/Dockerfile_nginx
index a9cdd81a..e78be505 100644
--- a/Dockerfile_nginx
+++ b/Dockerfile_nginx
@@ -1,3 +1,3 @@
FROM nginx:latest
-COPY ./nginx/default.conf etc/nginx/conf.d/default.conf
-COPY ./nginx/nginx.conf etc/nginx/nginx.conf
+COPY ./docker/nginx/default.conf etc/nginx/conf.d/default.conf
+COPY ./docker/nginx/nginx.conf etc/nginx/nginx.conf
diff --git a/Dockerfile_ui b/Dockerfile_ui
index 992bf9a6..3be3daae 100644
--- a/Dockerfile_ui
+++ b/Dockerfile_ui
@@ -20,8 +20,7 @@ RUN rm -rf /etc/localtime && ln -s /usr/share/zoneinfo/Asia/Harbin /etc/localti
ENV VIRTUAL_ENV=/app/.venv \
PATH="/app/.venv/bin:$PATH"
-RUN apt-get update && apt-get install -y libgl1 libglib2.0-0
-
+RUN apt-get update && apt-get install -y libgl1 libglib2.0-0 curl
WORKDIR /app
COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
COPY . .
diff --git a/README.md b/README.md
index 02fa9cda..fd034a83 100644
--- a/README.md
+++ b/README.md
@@ -12,12 +12,16 @@
📕 Contents
-- 💡 [What is PAI-RAG?](#what-is-pai-rag)
-- 🌟 [Key Features](#key-features)
-- 🔎 [Get Started](#get-started)
- - [Local](#run-in-local-environment)
+- 💡 [What is PAI-RAG?](#-what-is-pai-rag)
+- 🌟 [Key Features](#-key-features)
+- 🔎 [Get Started](#-get-started)
- [Docker](#run-in-docker)
-- 🔧 [API Service](#api-service)
+ - [Local](#run-in-local-environment)
+- 📜 [Documents](#-documents)
+ - [API specification](#api-specification)
+ - [Agentic RAG](#agentic-rag)
+ - [Data Analysis](#data-analysis)
+ - [Supported File Types](#supported-file-types)
@@ -27,9 +31,8 @@ PAI-RAG is an easy-to-use opensource framework for modular RAG (Retrieval-Augmen
# 🌟 Key Features
-![framework](docs/figures/framework.jpg)
-
- Modular design, flexible and configurable
+- Powerful RAG capability: multi-modal rag, agentic-rag and nl2sql support
- Built on community open source components, low customization threshold
- Multi-dimensional automatic evaluation system, easy to grasp the performance quality of each module
- Integrated llm-based-application tracing and evaluation visualization tools
@@ -38,304 +41,40 @@ PAI-RAG is an easy-to-use opensource framework for modular RAG (Retrieval-Augmen
# 🔎 Get Started
-## Run in Local Environment
-
-1. Clone Repo
-
- ```bash
- git clone git@github.com:aigc-apps/PAI-RAG.git
- ```
-
-2. Development Environment Settings
-
- This project uses poetry for management. To ensure environmental consistency and avoid problems caused by Python version differences, we specify Python version 3.11.
-
- ```bash
- conda create -n rag_env python==3.11
- conda activate rag_env
- ```
-
- if you use macOS and need to process PPTX files, you need use the following command to install the dependencies to process PPTX files:
-
- ```bash
- brew install mono-libgdiplus
- ```
-
-### (1) CPU
-
-Use poetry to install project dependency packages directly:
-
-```bash
-pip install poetry
-poetry install
-poetry run aliyun-bootstrap -a install
-```
-
-### (2) GPU
-
-First replace the default pyproject.toml with the GPU version, and then use poetry to install the project dependency package:
-
-```bash
-mv pyproject_gpu.toml pyproject.toml && rm poetry.lock
-pip install poetry
-poetry install
-poetry run aliyun-bootstrap -a install
-```
-
-- Common network timeout issues
-
- Note: During the installation, if you encounter a network connection timeout, you can add the Alibaba Cloud or Tsinghua mirror source and append the following lines to the end of the pyproject.toml file:
-
- ```bash
- [[tool.poetry.source]]
- name = "mirrors"
- url = "http://mirrors.aliyun.com/pypi/simple/" # Aliyun
- # url = "https://pypi.tuna.tsinghua.edu.cn/simple/" # Qsinghua
- priority = "default"
- ```
-
- After that, execute the following commands:
-
- ```bash
- poetry lock
- poetry install
- ```
-
-3. Load Data
-
- Insert new files in the data_path into the current index storage:
-
- ```bash
- load_data -c src/pai_rag/config/settings.yaml -d data_path -p pattern
- ```
-
- path examples:
-
- ```
- a. load_data -d test/example
- b. load_data -d test/example_data/pai_document.pdf
- c. load_data -d test/example_data -p *.pdf
-
- ```
-
-4. Run RAG Service
-
- To use the OpenAI or DashScope API, you need to introduce environment variables:
-
- ```bash
- export OPENAI_API_KEY=""
- export DASHSCOPE_API_KEY=""
- ```
-
- To utilize Object Storage Service (OSS) for file storage, particularly when operating in multimodal mode, you must first configure settings in both the src/pai_rag/config/settings.toml and src/pai_rag/config/settings_multi_modal.toml configuration files. Append the following TOML configuration snippet within these files:
-
- ```toml
- [rag.oss_store]
- bucket = ""
- endpoint = ""
- prefix = ""
- ```
-
- Additionally, you need to introduce environment variables:
-
- ```bash
- export OSS_ACCESS_KEY_ID=""
- export OSS_ACCESS_KEY_SECRET=""
- ```
-
- ```bash
- # Support custom host (default 0.0.0.0), port (default 8001), config (default src/pai_rag/config/settings.yaml), enable-example (default True), skip-download-models (default False)
- # Download [bge-large-zh-v1.5, easyocr] by default, you can skip it by setting --skip-download-models.
- # you can use tool "load_model" to download other models including [bge-large-zh-v1.5, easyocr, SGPT-125M-weightedmean-nli-bitfit, bge-large-zh-v1.5, bge-m3, bge-reranker-base, bge-reranker-large, paraphrase-multilingual-MiniLM-L12-v2, qwen_1.8b, text2vec-large-chinese]
- pai_rag serve [--host HOST] [--port PORT] [--config CONFIG_FILE] [--enable-example False] [--skip-download-models]
- ```
-
- The default configuration file is src/pai_rag/config/settings.yaml. However, if you require the multimodal llm module, you should switch to the src/pai_rag/config/settings_multi_modal.yaml file instead.
-
- ```bash
- pai_rag serve -c src/pai_rag/config/settings_multi_modal.yaml
- ```
-
-5. Download provided models to local directory
-
- ```bash
- # Support model name (default ""), download all models mentioned before without parameter model_name.
- load_model [--model-name MODEL_NAME]
- ```
-
-6. Run RAG WebUI
-
- ```bash
- # Supports custom host (default 0.0.0.0), port (default 8002), config (default localhost:8001)
- pai_rag ui [--host HOST] [--port PORT] [rag-url RAG_URL]
- ```
-
- You can also open http://127.0.0.1:8002/ to configure the RAG service and upload local data.
-
## Run in Docker
-To make it easier to use and save time on environment installation, we also provide a method to start directly based on the image.
-
-### Use public images directly
-
-1. RAG Service
-
-- CPU
-
- ```bash
- docker pull mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/mybigpai/pairag:0.1.0
-
- # -p (port) -v (mount embedding and rerank model directories) -e (set environment variables, if using Dashscope LLM/Embedding, need to be introduced) -w (number of workers, can be specified as the approximate number of CPU cores)
- docker run --name pai_rag \
- -p 8001:8001 \
- -v /huggingface:/huggingface \
- -v /your_local_documents_path:/data \
- -e DASHSCOPE_API_KEY=${DASHSCOPE_API_KEY} \
- -d \
- mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/mybigpai/pairag:0.1.0 gunicorn -b 0.0.0.0:8001 -w 16 -k uvicorn.workers.UvicornH11Worker pai_rag.main:app
- ```
-
-- GPU
-
- ```bash
- docker pull mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/mybigpai/pairag:0.1.0-gpu
-
- # -p (port) -v (mount embedding and rerank model directories) -e (set environment variables, if using Dashscope LLM/Embedding, you need to introduce it) -w (number of workers, which can be specified as the approximate number of CPU cores)
- docker run --name pai_rag \
- -p 8001:8001 \
- -v /huggingface:/huggingface \
- -v /your_local_documents_path:/data \
- --gpus all \
- -e DASHSCOPE_API_KEY=${DASHSCOPE_API_KEY} \
- -d \
- mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/mybigpai/pairag:0.1.0-gpu gunicorn -b 0.0.0.0:8001 -w 16 -k uvicorn.workers.UvicornH11Worker pai_rag.main:app
- ```
-
-2. Load Data
-
- Insert new files in the /data into the current index storage:
-
+1. Setup environmental variables.
```bash
- sudo docker exec -ti pai_rag bash
- load_data -c src/pai_rag/config/settings.yaml -d /data -p pattern
+ cd docker
+ cp .env.example .env
```
-
- path examples:
-
- ```
- a. load_data -d /data/test/example
- b. load_data -d /data/test/example_data/pai_document.pdf
- c. load_data -d /data/test/example_data -p *.pdf
+ edit `.env` file if you are using dashscope api or oss store:
+2. Start with docker compose command:
+ ```bash
+ docker compose up -d
```
+3. Now you can open http://localhost:8000 to check whether it works. The service will need to download the model weights, which may take a while (usually 20 minutes).
-3. RAG UI
- Linux:
-
-```bash
-docker pull mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/mybigpai/pairag:0.1.0-ui
-
-docker run --network host -d mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/mybigpai/pairag:0.1.0-ui
-```
-
-Mac/Windows:
-
-```bash
-docker pull mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/mybigpai/pairag:0.1.0-ui
-
-docker run -p 8002:8002 -d mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/mybigpai/pairag:0.1.0_ui pai_rag ui -p 8002 -c http://host.docker.internal:8001/
-```
-
-You can also open http://127.0.0.1:8002/ to configure the RAG service and upload local data.
-
-### Build your own image based on Dockerfile
-
-You can refer to [How to Build Docker](docs/docker_build.md) to build the image yourself.
-
-After the image is built, you can refer to the above steps to start the Rag service and WebUI.
-
-# 🔧 API Service
-
-You can use the command line to send API requests to the server, for example, calling the [Upload API](#upload-api) to upload a knowledge base file.
-
-## Upload API
-
-It supports uploading local files through API and supports specifying different failure_paths. Each time an API request is sent, a task_id will be returned. The file upload status (processing, completed, failed) can then be checked through the task_id.
-
-- upload_data
-
-```bash
-curl -X 'POST' http://127.0.0.1:8000/service/upload_data -H 'Content-Type: multipart/form-data' -F 'files=@local_path/PAI.txt' -F 'faiss_path=localdata/storage'
-
-# Return: {"task_id": "2c1e557733764fdb9fefa063538914da"}
-```
-
-- get_upload_state
-
-```bash
-curl http://127.0.0.1:8001/service/get_upload_state\?task_id\=2c1e557733764fdb9fefa063538914da
-
-# Return: {"task_id":"2c1e557733764fdb9fefa063538914da","status":"completed"}
-```
-
-## Query API
-
-- Supports three dialogue modes:
- - /query/retrieval
- - /query/llm
- - /query: (default) RAG (retrieval + llm)
-
-```bash
-curl -X 'POST' http://127.0.0.1:8000/service/query -H "Content-Type: application/json" -d '{"question":"PAI是什么?"}'
-```
-
-- Multi-round dialogue
-
-```bash
-curl -X 'POST' http://127.0.0.1:8000/service/query -H "Content-Type: application/json" -d '{"question":"What is PAI?"}'
-```
+## Run in Local Environment
-> Parameters: session_id
->
-> The unique identifier of the conversation history session. After the session_id is passed in, the conversation history will be recorded. Calling the large model will automatically carry the stored conversation history.
->
-> ```bash
-> curl -X 'POST' http://127.0.0.1:8000/service/query -H "Content-Type: application/json" -d '{"question":"What are its advantages?", "session_id": "1702ffxxad3xxx6fxxx97daf7c"}'
-> ```
+If you want to start running/developing pai_rag locally, please refer to [local development](./docs/develop/local_develop.md)
-> Parameters: chat_history
->
-> The conversation history between the user and the model. Each element in the list is a round of conversation in the form of {"user":"user input","bot":"model output"}. Multiple rounds of conversations are arranged in chronological order.
->
-> ```bash
-> curl -X 'POST' http://127.0.0.1:8000/service/query -H "Content-Type: application/json" -d '{"question":"What are its features?", "chat_history": [{"user":"What is PAI?", "bot":"PAI is Alibaba Cloud's artificial intelligence platform, which provides a one-stop machine learning solution. This platform supports various machine learning tasks, including supervised learning, unsupervised learning, and reinforcement learning, and is suitable for multiple scenarios such as marketing, finance, and social networks."}]}'
-> ```
+# 📜 Documents
-> Parameters: session_id + chat_history
->
-> Chat_history will be used to append and update the conversation history corresponding to the stored session_id
->
-> ```bash
-> curl -X 'POST' http://127.0.0.1:8000/service/query -H "Content-Type: application/json" -d '{"question":"What are its advantages?", "chat_history": [{"user":"PAI是什么?", "bot":"PAI is Alibaba Cloud's artificial intelligence platform, which provides a one-stop machine learning solution. This platform supports various machine learning tasks, including supervised learning, unsupervised learning, and reinforcement learning, and is suitable for multiple scenarios such as marketing, finance, and social networks."}], "session_id": "1702ffxxad3xxx6fxxx97daf7c"}'
-> ```
+## API specification
-- Agent And Function Tool
+You can access and integrate our RAG service according to our [API specification](./docs/api.md).
-# Agentic RAG
+## Agentic RAG
You can use agent with function calling api-tools in PAI-RAG, please refer to the documentation:
[Agentic RAG](./docs/agentic_rag.md)
-# Data Analysis
+## Data Analysis
You can use data analysis based on database or sheet file in PAI-RAG, please refer to the documentation: [Data Analysis](./docs/data_analysis_doc.md)
-# Parameter Configuration
-
-For more customization options, please refer to the documentation:
-
-[Parameter Configuration Instruction](./docs/config_guide_en.md)
-
-# Supported File Types
+## Supported File Types
| 文件类型 | 文件格式 |
| ------------ | -------------------------------------- |
diff --git a/README_zh.md b/README_zh.md
index aa2b2856..a6acec33 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -7,12 +7,16 @@
📕 目录
-- 💡 [什么是PAI-RAG?](#什么是pai-rag)
-- 🌟 [主要模块和功能](#主要模块和功能)
-- 🔎 [快速开始](#快速开始)
- - [本地环境](#方式一本地环境)
- - [Docker镜像](#方式二docker镜像)
-- 🔧 [API服务](#api服务)
+- 💡 [什么是PAI-RAG?](#-什么是pai-rag)
+- 🌟 [主要模块和功能](#-主要模块和功能)
+- 🔎 [快速开始](#-快速开始)
+ - [Docker镜像](#Docker镜像启动)
+ - [本地环境](#本地启动)
+- 📜 [文档](#-文档)
+ - [API服务](#api服务)
+ - [Agentic RAG](#agentic-rag)
+ - [数据分析Nl2sql](#数据分析-nl2sql)
+ - [支持文件类型](#支持文件类型)
@@ -22,9 +26,8 @@ PAI-RAG 是一个易于使用的模块化 RAG(检索增强生成)开源框
# 🌟 主要模块和功能
-![framework](docs/figures/framework.jpg)
-
- 模块化设计,灵活可配置
+- 功能丰富,包括Agentic RAG, 多模态问答和nl2sql等
- 基于社区开源组件构建,定制化门槛低
- 多维度自动评估体系,轻松掌握各模块性能质量
- 集成全链路可观测和评估可视化工具
@@ -33,262 +36,47 @@ PAI-RAG 是一个易于使用的模块化 RAG(检索增强生成)开源框
# 🔎 快速开始
-## 方式一:本地环境
-
-1. 克隆仓库
-
- ```bash
- git clone git@github.com:aigc-apps/PAI-RAG.git
- ```
-
-2. 配置开发环境
-
- 本项目使用poetry进行管理,若在本地环境下使用,建议在安装环境之前先创建一个空环境。为了确保环境一致性并避免因Python版本差异造成的问题,我们指定Python版本为3.11。
-
- ```bash
- conda create -n rag_env python==3.11
- conda activate rag_env
- ```
-
- 如果使用macOS且需要处理PPTX文件,需要下载依赖库处理PPTX文件
-
- ```bash
- brew install mono-libgdiplus
- ```
-
- ### (1) CPU环境
-
- 直接使用poetry安装项目依赖包:
-
- ```bash
- pip install poetry
- poetry install
- poetry run aliyun-bootstrap -a install
- ```
-
- ### (2) GPU环境
-
- 首先替换默认 pyproject.toml 为 GPU 版本, 再使用poetry安装项目依赖包:
-
- ```bash
- mv pyproject_gpu.toml pyproject.toml && rm poetry.lock
- pip install poetry
- poetry install
- poetry run aliyun-bootstrap -a install
-
- ```
-
-- 常见网络超时问题
-
- 注:在安装过程中,若遇到网络连接超时的情况,可以添加阿里云或清华的镜像源,在 pyproject.toml 文件末尾追加以下几行:
-
- ```bash
- [[tool.poetry.source]]
- name = "mirrors"
- url = "http://mirrors.aliyun.com/pypi/simple/" # 阿里云
- # url = "https://pypi.tuna.tsinghua.edu.cn/simple/" # 清华
- priority = "default"
- ```
-
- 之后,再依次执行以下命令:
-
- ```bash
- poetry lock
- poetry install
- ```
-
-3. 加载数据
-
- 向当前索引存储中插入data_path路径下的新文件
-
- ```bash
- load_data -c src/pai_rag/config/settings.yaml -d data_path -p pattern
- ```
-
- path examples:
-
- ```
- a. load_data -d test/example
- b. load_data -d test/example_data/pai_document.pdf
- c. load_data -d test/example_data -p *.pdf
-
- ```
-
-4. 启动RAG服务
-
- 使用OpenAI API,需要在命令行引入环境变量
-
- ```bash
- export OPENAI_API_KEY=""
- ```
-
- 使用DashScope API,需要在命令行引入环境变量
-
- ```bash
- export DASHSCOPE_API_KEY=""
- ```
-
- 使用OSS存储文件(使用多模态模式时必须提前配置),在配置文件src/pai_rag/config/settings.toml和src/pai_rag/config/settings_multi_modal.toml中添加以下配置:
+## Docker镜像启动
- ```toml
- [rag.oss_store]
- bucket = ""
- endpoint = ""
- prefix = ""
- ```
-
- 并需要在命令行引入环境变量
-
- ```bash
- export OSS_ACCESS_KEY_ID=""
- export OSS_ACCESS_KEY_SECRET=""
- ```
-
- 启动RAG服务
-
- ```bash
- # 启动,支持自定义host(默认0.0.0.0), port(默认8001), config(默认src/pai_rag/config/settings.yaml), enable-example(默认True), skip-download-models(不加为False)
- # 默认启动时下载模型 [bge-large-zh-v1.5, easyocr] , 可设置 skip-download-models 避免启动时下载模型.
- # 可使用命令行 "load_model" 下载模型 including [bge-large-zh-v1.5, easyocr, SGPT-125M-weightedmean-nli-bitfit, bge-large-zh-v1.5, bge-m3, bge-reranker-base, bge-reranker-large, paraphrase-multilingual-MiniLM-L12-v2, qwen_1.8b, text2vec-large-chinese]
- pai_rag serve [--host HOST] [--port PORT] [--config CONFIG_FILE] [--enable-example False] [--skip-download-models]
- ```
+为了更方便使用,节省较长时间的环境安装问题,我们也提供了直接基于镜像启动的方式。
- 启动默认配置文件为src/pai_rag/config/settings.yaml,若需要使用多模态,请切换到src/pai_rag/config/settings_multi_modal.yaml
+1. 配置环境变量
```bash
- pai_rag serve -c src/pai_rag/config/settings_multi_modal.yaml
+ cd docker
+ cp .env.example .env
```
-5. 下载其他模型到本地
-
- ```bash
- # 支持 model name (默认 ""), 没有参数时, 默认下载上述所有模型。
- load_model [--model-name MODEL_NAME]
- ```
+ 如果你需要使用dashscope api或者OSS存储,可以根据需要修改.env中的环境变量。
-6. 启动RAG WebUI
+2. 启动
```bash
- # 启动,支持自定义host(默认0.0.0.0), port(默认8002), config(默认localhost:8001)
- pai_rag ui [--host HOST] [--port PORT] [rag-url RAG_URL]
+ docker-compose up -d
```
- 你也可以打开http://127.0.0.1:8002/ 来配置RAG服务以及上传本地数据。
-
-## 方式二:Docker镜像
-
-为了更方便使用,节省较长时间的环境安装问题,我们也提供了直接基于镜像启动的方式。
-
-### 使用公开镜像
-
-1. 启动RAG服务
-
-- CPU
-
- ```bash
- docker pull mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/mybigpai/pairag:0.1.0
-
- # 启动: -p(端口) -v(挂载embedding和rerank模型目录) -e(设置环境变量,若使用Dashscope LLM/Embedding,需要引入) -w(worker数量,可以指定为近似cpu核数)
- docker run -p 8001:8001 -v /huggingface:/huggingface -e DASHSCOPE_API_KEY=sk-xxxx -d mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/mybigpai/pairag:0.1.0 gunicorn -b 0.0.0.0:8001 -w 16 -k uvicorn.workers.UvicornH11Worker pai_rag.main:app
- ```
-
-- GPU
-
- ```bash
- docker pull mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/mybigpai/pairag:0.1.0-gpu
-
- # 启动: -p(端口) -v(挂载embedding和rerank模型目录) -e(设置环境变量,若使用Dashscope LLM/Embedding,需要引入) -w(worker数量,可以指定为近似cpu核数)
- docker run -p 8001:8001 -v /huggingface:/huggingface --gpus all -e DASHSCOPE_API_KEY=sk-xxxx -d mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/mybigpai/pairag:0.1.0-gpu gunicorn -b 0.0.0.0:8001 -w 16 -k uvicorn.workers.UvicornH11Worker pai_rag.main:app
- ```
-
-2. 启动RAG WebUI
- Linux:
+3. 打开浏览器中的 http://localhost:8000 访问web ui. 第一次启动服务会下载需要的相关模型文件,需要等待20分钟左右。
-```bash
-docker pull mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/mybigpai/pairag:0.1.0-ui
+## 本地启动
-docker run --network host -d mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/mybigpai/pairag:0.1.0-ui
-```
+如果想在本地启动或者进行代码开发,可以参考文档:[本地运行](./docs/develop/local_develop_zh.md)
-Mac/Windows:
+# 📜 文档
-```bash
-docker pull mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/mybigpai/pairag:0.1.0-ui
+## API服务
-docker run -p 8002:8002 -d mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/mybigpai/pairag:0.1.0-ui pai_rag ui -p 8002 -c http://host.docker.internal:8001/
-```
+可以直接通过API服务调用RAG能力(上传数据,RAG查询,检索,NL2SQL, Function call等等)。更多细节可以查看[API文档](./docs/api_zh.md)
-### 基于Dockerfile自行构建镜像
-
-可以参考[How to Build Docker](docs/docker_build.md)来自行构建镜像。
-
-镜像构建完成后可参考【使用公开镜像】的步骤启动RAG服务和WebUI。
-
-# 🔧 API服务
-
-你可以使用命令行向服务侧发送API请求。比如调用[Upload API](#upload-api)上传知识库文件。
-
-## Upload API
-
-支持通过API的方式上传本地文件,并支持指定不同的faiss_path,每次发送API请求会返回一个task_id,之后可以通过task_id来查看文件上传状态(processing、completed、failed)。
-
-- 上传(upload_data)
-
-```bash
-curl -X 'POST' http://127.0.0.1:8000/service/upload_data -H 'Content-Type: multipart/form-data' -F 'files=@local_path/PAI.txt' -F 'faiss_path=localdata/storage'
-
-# Return: {"task_id": "2c1e557733764fdb9fefa063538914da"}
-```
-
-- 查看上传状态(get_upload_state)
-
-```bash
-curl http://127.0.0.1:8077/service/get_upload_state\?task_id\=2c1e557733764fdb9fefa063538914da
-
-# Return: {"task_id":"2c1e557733764fdb9fefa063538914da","status":"completed"}
-```
-
-## Query API
-
-- Rag Query请求
-
-```bash
-curl -X 'POST' http://127.0.0.1:8000/service/query -H "Content-Type: application/json" -d '{"question":"PAI是什么?"}'
-```
-
-- 多轮对话请求
-
-```bash
-curl -X 'POST' http://127.0.0.1:8000/service/query -H "Content-Type: application/json" -d '{"question":"PAI是什么?"}'
-
-# 传入session_id:对话历史会话唯一标识,传入session_id后,将对话历史进行记录,调用大模型将自动携带存储的对话历史。
-curl -X 'POST' http://127.0.0.1:8000/service/query -H "Content-Type: application/json" -d '{"question":"它有什么优势?", "session_id": "1702ffxxad3xxx6fxxx97daf7c"}'
-
-# 传入chat_history:用户与模型的对话历史,list中的每个元素是形式为{"user":"用户输入","bot":"模型输出"}的一轮对话,多轮对话按时间顺序排列。
-curl -X 'POST' http://127.0.0.1:8000/service/query -H "Content-Type: application/json" -d '{"question":"它有哪些功能?", "chat_history": [{"user":"PAI是什么?", "bot":"PAI是阿里云的人工智能平台,它提供一站式的机器学习解决方案。这个平台支持各种机器学习任务,包括有监督学习、无监督学习和增强学习,适用于营销、金融、社交网络等多个场景。"}]}'
-
-# 同时传入session_id和chat_history:会用chat_history对存储的session_id所对应的对话历史进行追加更新
-curl -X 'POST' http://127.0.0.1:8000/service/query -H "Content-Type: application/json" -d '{"question":"它有什么优势?", "chat_history": [{"user":"PAI是什么?", "bot":"PAI是阿里云的人工智能平台,它提供一站式的机器学习解决方案。这个平台支持各种机器学习任务,包括有监督学习、无监督学习和增强学习,适用于营销、金融、社交网络等多个场景。"}], "session_id": "1702ffxxad3xxx6fxxx97daf7c"}'
-```
-
-- Agent及调用Function Tool的简单对话
-
-# Agentic RAG
+## Agentic RAG
您也可以在PAI-RAG中使用支持API function calling功能的Agent,请参考文档:
[Agentic RAG](./docs/agentic_rag.md)
-# Data Analysis
-
-您可以在PAI-RAG中使用支持数据库和表格文件的数据分析功能,请参考文档:[Data Analysis](./docs/data_analysis_doc.md)
-
-# 参数配置
-
-如需实现更多个性化配置,请参考文档:
+## 数据分析 NL2sql
-[参数配置说明](./docs/config_guide_cn.md)
+您可以在PAI-RAG中使用支持数据库和表格文件的数据分析功能,请参考文档:[数据分析 Nl2sql](./docs/data_analysis_doc.md)
-# 支持文件类型
+## 支持文件类型
| 文件类型 | 文件格式 |
| -------- | -------------------------------------- |
diff --git a/docker/.env.example b/docker/.env.example
new file mode 100644
index 00000000..76579211
--- /dev/null
+++ b/docker/.env.example
@@ -0,0 +1,6 @@
+# DASHSCOPE API_KEY
+DASHSCOPE_API_KEY=
+
+# OSS AK SK
+OSS_ACCESS_KEY_ID=
+OSS_ACCESS_KEY_SECRET=
diff --git a/docker/compose.yaml b/docker/compose.yaml
new file mode 100644
index 00000000..52c320c1
--- /dev/null
+++ b/docker/compose.yaml
@@ -0,0 +1,38 @@
+services:
+ api:
+ image: mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/mybigpai/pairag:0.1.0
+ ports:
+ - "8001:8001"
+ restart: always
+ environment:
+ DASHSCOPE_API_KEY: ${DASHSCOPE_API_KEY}
+ OSS_ACCESS_KEY_ID: ${OSS_ACCESS_KEY_ID}
+ OSS_ACCESS_KEY_SECRET: ${OSS_ACCESS_KEY_SECRET}
+
+ volumes:
+ - ../model_repository:/app/model_repository
+ - ./app_data:/app/localdata
+ entrypoint: ["pai_rag", "serve"]
+ healthcheck:
+ test: ["CMD", "curl", "-f", "http://localhost:8001/health"]
+ interval: 30s
+ retries: 40
+ start_period: 20s
+
+ web:
+ image: mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/mybigpai/pairag:0.1.0-ui
+ ports:
+ - "8002:8002"
+ restart: always
+ depends_on:
+ - api
+ entrypoint: ["pai_rag", "ui", "-c", "http://api:8001"]
+
+ nginx:
+ image: mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/mybigpai/pairag:0.1.0-nginx
+ volumes:
+ - ./nginx/default.conf.compose:/etc/nginx/conf.d/default.conf
+ ports:
+ - "8000:8000"
+ entrypoint: ["/docker-entrypoint.sh", "nginx"]
+ restart: always
diff --git a/nginx/default.conf b/docker/nginx/default.conf
similarity index 92%
rename from nginx/default.conf
rename to docker/nginx/default.conf
index 83ec8042..8b9a05dd 100644
--- a/nginx/default.conf
+++ b/docker/nginx/default.conf
@@ -33,6 +33,12 @@ server {
proxy_pass http://127.0.0.1:8001;
}
+ location /health {
+ proxy_set_header Host \$host;
+ proxy_set_header X-Forwarded-For \$proxy_add_x_forwarded_for;
+ proxy_pass http://127.0.0.1:8001;
+ }
+
location /docs {
proxy_set_header Host \$host;
proxy_set_header X-Forwarded-For \$proxy_add_x_forwarded_for;
diff --git a/docker/nginx/default.conf.compose b/docker/nginx/default.conf.compose
new file mode 100644
index 00000000..5a733ccc
--- /dev/null
+++ b/docker/nginx/default.conf.compose
@@ -0,0 +1,86 @@
+
+server {
+ listen 8000;
+ listen [::]:8000;
+ server_name localhost;
+ client_max_body_size 50m;
+
+ #access_log /var/log/nginx/host.access.log main;
+
+ location / {
+ proxy_set_header Host \$host;
+ proxy_set_header X-Forwarded-For \$proxy_add_x_forwarded_for;
+ proxy_pass http://web:8002;
+ }
+
+ #Websocket configuration
+ location /queue/ {
+ proxy_pass http://web:8002/queue/;
+ proxy_http_version 1.1;
+ proxy_set_header Upgrade $http_upgrade;
+ proxy_set_header Connection "upgrade";
+ }
+
+ location /service {
+ proxy_set_header Host \$host;
+ proxy_set_header X-Forwarded-For \$proxy_add_x_forwarded_for;
+ proxy_pass http://api:8001;
+ }
+
+ location /v1 {
+ proxy_set_header Host \$host;
+ proxy_set_header X-Forwarded-For \$proxy_add_x_forwarded_for;
+ proxy_pass http://api:8001;
+ }
+
+ location /health {
+ proxy_set_header Host \$host;
+ proxy_set_header X-Forwarded-For \$proxy_add_x_forwarded_for;
+ proxy_pass http://api:8001;
+ }
+
+
+ location /docs {
+ proxy_set_header Host \$host;
+ proxy_set_header X-Forwarded-For \$proxy_add_x_forwarded_for;
+ proxy_pass http://api:8001;
+ }
+
+ location /openapi {
+ proxy_set_header Host \$host;
+ proxy_set_header X-Forwarded-For \$proxy_add_x_forwarded_for;
+ proxy_pass http://api:8001;
+ }
+
+ #error_page 404 /404.html;
+
+ # redirect server error pages to the static page /50x.html
+ #
+ error_page 500 502 503 504 /50x.html;
+ location = /50x.html {
+ root /usr/share/nginx/html;
+ }
+
+ # proxy the PHP scripts to Apache listening on 127.0.0.1:80
+ #
+ #location ~ \.php$ {
+ # proxy_pass http://127.0.0.1;
+ #}
+
+ # pass the PHP scripts to FastCGI server listening on 127.0.0.1:9000
+ #
+ #location ~ \.php$ {
+ # root html;
+ # fastcgi_pass 127.0.0.1:9000;
+ # fastcgi_index index.php;
+ # fastcgi_param SCRIPT_FILENAME /scripts$fastcgi_script_name;
+ # include fastcgi_params;
+ #}
+
+ # deny access to .htaccess files, if Apache's document root
+ # concurs with nginx's one
+ #
+ #location ~ /\.ht {
+ # deny all;
+ #}
+}
diff --git a/nginx/nginx.conf b/docker/nginx/nginx.conf
similarity index 100%
rename from nginx/nginx.conf
rename to docker/nginx/nginx.conf
diff --git a/docs/api.md b/docs/api.md
new file mode 100644
index 00000000..380dca99
--- /dev/null
+++ b/docs/api.md
@@ -0,0 +1,74 @@
+# 🔧 API Service
+
+You can use the command line to send API requests to the server, for example, calling the [Upload Data API](#upload-data-api) to upload a knowledge base file.
+
+## Upload Data API
+
+It supports uploading local files through API and supports specifying different failure_paths. Each time an API request is sent, a task_id will be returned. The file upload status (processing, completed, failed) can then be checked through the task_id.
+
+- upload_data
+
+```bash
+curl -X 'POST' http://127.0.0.1:8000/service/upload_data -H 'Content-Type: multipart/form-data' -F 'files=@local_path/PAI.txt'
+
+# Return: {"task_id": "2c1e557733764fdb9fefa063538914da"}
+```
+
+- get_upload_state
+
+```bash
+curl http://127.0.0.1:8001/service/get_upload_state\?task_id\=2c1e557733764fdb9fefa063538914da
+
+# Return: {"task_id":"2c1e557733764fdb9fefa063538914da","status":"completed"}
+```
+
+## Query API
+
+- Supports three dialogue modes:
+ - /query/retrieval
+ - /query/llm
+ - /query: (default) RAG (retrieval + llm)
+
+```bash
+curl -X 'POST' http://127.0.0.1:8000/service/query -H "Content-Type: application/json" -d '{"question":"PAI是什么?"}'
+```
+
+```bash
+# streaming output
+curl -X 'POST' http://127.0.0.1:8000/service/query -H "Content-Type: application/json" -d '{"question":"PAI是什么?", "stream":true}'
+```
+
+```bash
+# with intent
+curl -X 'POST' http://127.0.0.1:8000/service/query -H "Content-Type: application/json" -d '{"question":"现在几点了", "with_intent":true}'
+```
+
+- Multi-round dialogue
+
+```bash
+curl -X 'POST' http://127.0.0.1:8000/service/query -H "Content-Type: application/json" -d '{"question":"What is PAI?"}'
+```
+
+> Parameters: session_id
+>
+> The unique identifier of the conversation history session. After the session_id is passed in, the conversation history will be recorded. Calling the large model will automatically carry the stored conversation history.
+>
+> ```bash
+> curl -X 'POST' http://127.0.0.1:8000/service/query -H "Content-Type: application/json" -d '{"question":"What are its advantages?", "session_id": "1702ffxxad3xxx6fxxx97daf7c"}'
+> ```
+
+> Parameters: chat_history
+>
+> The conversation history between the user and the model. Each element in the list is a round of conversation in the form of {"user":"user input","bot":"model output"}. Multiple rounds of conversations are arranged in chronological order.
+>
+> ```bash
+> curl -X 'POST' http://127.0.0.1:8000/service/query -H "Content-Type: application/json" -d '{"question":"What are its features?", "chat_history": [{"user":"What is PAI?", "bot":"PAI is Alibaba Cloud's artificial intelligence platform, which provides a one-stop machine learning solution. This platform supports various machine learning tasks, including supervised learning, unsupervised learning, and reinforcement learning, and is suitable for multiple scenarios such as marketing, finance, and social networks."}]}'
+> ```
+
+> Parameters: session_id + chat_history
+>
+> Chat_history will be used to append and update the conversation history corresponding to the stored session_id
+>
+> ```bash
+> curl -X 'POST' http://127.0.0.1:8000/service/query -H "Content-Type: application/json" -d '{"question":"What are its advantages?", "chat_history": [{"user":"PAI是什么?", "bot":"PAI is Alibaba Cloud's artificial intelligence platform, which provides a one-stop machine learning solution. This platform supports various machine learning tasks, including supervised learning, unsupervised learning, and reinforcement learning, and is suitable for multiple scenarios such as marketing, finance, and social networks."}], "session_id": "1702ffxxad3xxx6fxxx97daf7c"}'
+> ```
diff --git a/docs/api_zh.md b/docs/api_zh.md
new file mode 100644
index 00000000..85f94697
--- /dev/null
+++ b/docs/api_zh.md
@@ -0,0 +1,56 @@
+你可以使用命令行向服务侧发送API请求。比如调用[Upload API](#upload-api)上传知识库文件。
+
+## Upload API
+
+支持通过API的方式上传本地文件,并支持指定不同的faiss_path,每次发送API请求会返回一个task_id,之后可以通过task_id来查看文件上传状态(processing、completed、failed)。
+
+- 上传(upload_data)
+
+```bash
+curl -X 'POST' http://127.0.0.1:8000/service/upload_data -H 'Content-Type: multipart/form-data' -F 'files=@local_path/PAI.txt' -F 'faiss_path=localdata/storage'
+
+# Return: {"task_id": "2c1e557733764fdb9fefa063538914da"}
+```
+
+- 查看上传状态(get_upload_state)
+
+```bash
+curl http://127.0.0.1:8077/service/get_upload_state\?task_id\=2c1e557733764fdb9fefa063538914da
+
+# Return: {"task_id":"2c1e557733764fdb9fefa063538914da","status":"completed"}
+```
+
+## Query API
+
+- Rag Query请求
+
+```bash
+curl -X 'POST' http://127.0.0.1:8000/service/query -H "Content-Type: application/json" -d '{"question":"PAI是什么?"}'
+```
+
+```bash
+# 流式输出
+curl -X 'POST' http://127.0.0.1:8000/service/query -H "Content-Type: application/json" -d '{"question":"PAI是什么?", "stream": true}'
+```
+
+```bash
+# 意图识别
+curl -X 'POST' http://127.0.0.1:8000/service/query -H "Content-Type: application/json" -d '{"question":"现在几点了", "with_intent": true}'
+```
+
+- 多轮对话请求
+
+```bash
+curl -X 'POST' http://127.0.0.1:8000/service/query -H "Content-Type: application/json" -d '{"question":"PAI是什么?"}'
+
+# 传入session_id:对话历史会话唯一标识,传入session_id后,将对话历史进行记录,调用大模型将自动携带存储的对话历史。
+curl -X 'POST' http://127.0.0.1:8000/service/query -H "Content-Type: application/json" -d '{"question":"它有什么优势?", "session_id": "1702ffxxad3xxx6fxxx97daf7c"}'
+
+# 传入chat_history:用户与模型的对话历史,list中的每个元素是形式为{"user":"用户输入","bot":"模型输出"}的一轮对话,多轮对话按时间顺序排列。
+curl -X 'POST' http://127.0.0.1:8000/service/query -H "Content-Type: application/json" -d '{"question":"它有哪些功能?", "chat_history": [{"user":"PAI是什么?", "bot":"PAI是阿里云的人工智能平台,它提供一站式的机器学习解决方案。这个平台支持各种机器学习任务,包括有监督学习、无监督学习和增强学习,适用于营销、金融、社交网络等多个场景。"}]}'
+
+# 同时传入session_id和chat_history:会用chat_history对存储的session_id所对应的对话历史进行追加更新
+curl -X 'POST' http://127.0.0.1:8000/service/query -H "Content-Type: application/json" -d '{"question":"它有什么优势?", "chat_history": [{"user":"PAI是什么?", "bot":"PAI是阿里云的人工智能平台,它提供一站式的机器学习解决方案。这个平台支持各种机器学习任务,包括有监督学习、无监督学习和增强学习,适用于营销、金融、社交网络等多个场景。"}], "session_id": "1702ffxxad3xxx6fxxx97daf7c"}'
+```
+
+- Agent及调用Function Tool的简单对话
diff --git a/docs/develop/local_develop.md b/docs/develop/local_develop.md
new file mode 100644
index 00000000..0830717e
--- /dev/null
+++ b/docs/develop/local_develop.md
@@ -0,0 +1,105 @@
+For local development, please refer to the following steps:
+
+## Run in Local Environment
+
+1. Clone Repo
+
+ ```bash
+ git clone git@github.com:aigc-apps/PAI-RAG.git
+ ```
+
+2. Development Environment Settings
+
+ This project uses poetry for management. To ensure environmental consistency and avoid problems caused by Python version differences, we specify Python version 3.11.
+
+ ```bash
+ conda create -n rag_env python==3.11
+ conda activate rag_env
+ ```
+
+ if you use macOS and need to process PPTX files, you need use the following command to install the dependencies to process PPTX files:
+
+ ```bash
+ brew install mono-libgdiplus
+ ```
+
+ Use poetry to install project dependency packages directly:
+
+ ```bash
+ pip install poetry
+ poetry install
+ poetry run aliyun-bootstrap -a install
+ ```
+
+- Common network timeout issues
+
+ Note: During the installation, if you encounter a network connection timeout, you can add the Alibaba Cloud or Tsinghua mirror source and append the following lines to the end of the pyproject.toml file:
+
+ ```bash
+ [[tool.poetry.source]]
+ name = "mirrors"
+ url = "http://mirrors.aliyun.com/pypi/simple/" # Aliyun
+ # url = "https://pypi.tuna.tsinghua.edu.cn/simple/" # Qsinghua
+ priority = "default"
+ ```
+
+ After that, execute the following commands:
+
+ ```bash
+ poetry lock
+ poetry install
+ ```
+
+3. Download Models:
+
+ Download models (embedding/pdf-extractor/reranker models) using `load_model` command:
+
+ ```bash
+ # Support model name (default ""), download all models mentioned before without parameter model_name.
+ load_model [--model-name MODEL_NAME]
+ ```
+
+4. Run RAG Service
+
+ To use the DashScope API, you need to export environment variables:
+
+ ```bash
+ export DASHSCOPE_API_KEY=""
+ ```
+
+ ```bash
+ # Support custom host (default 0.0.0.0), port (default 8001), config (default src/pai_rag/config/settings.yaml), skip-download-models (default False)
+ # Download [bge-large-zh-v1.5, easyocr] by default, you can skip it by setting --skip-download-models.
+ # you can use tool "load_model" to download other models including [bge-large-zh-v1.5, easyocr, SGPT-125M-weightedmean-nli-bitfit, bge-large-zh-v1.5, bge-m3, bge-reranker-base, bge-reranker-large, paraphrase-multilingual-MiniLM-L12-v2, qwen_1.8b, text2vec-large-chinese]
+ pai_rag serve [--host HOST] [--port PORT] [--config CONFIG_FILE] [--skip-download-models]
+ ```
+
+ ```bash
+ pai_rag serve
+ ```
+
+5. Run RAG WebUI
+
+ ```bash
+ # Supports custom host (default 0.0.0.0), port (default 8002), config (default localhost:8001)
+ pai_rag ui [--host HOST] [--port PORT] [rag-url RAG_URL]
+ ```
+
+ You can also open http://127.0.0.1:8002/ to configure the RAG service and upload local data.
+
+6. [Optional] Local load_data tool
+
+ Apart from upload files from web ui, you can load data into knowledge base using `load_data` script
+
+ ```bash
+ load_data -c src/pai_rag/config/settings.yaml -d data_path -p pattern
+ ```
+
+ path examples:
+
+ ```
+ a. load_data -d test/example
+ b. load_data -d test/example_data/pai_document.pdf
+ c. load_data -d test/example_data -p *.pdf
+
+ ```
diff --git a/docs/develop/local_develop_zh.md b/docs/develop/local_develop_zh.md
new file mode 100644
index 00000000..b0a4d986
--- /dev/null
+++ b/docs/develop/local_develop_zh.md
@@ -0,0 +1,105 @@
+如果需要在本地进行开发运行,请参考以下步骤:
+
+## 本地启动
+
+1. 克隆仓库
+
+ ```bash
+ git clone git@github.com:aigc-apps/PAI-RAG.git
+ ```
+
+2. 配置开发环境
+
+ 本项目使用poetry进行管理,若在本地环境下使用,建议在安装环境之前先创建一个空环境。为了确保环境一致性并避免因Python版本差异造成的问题,我们指定Python版本为3.11。
+
+ ```bash
+ conda create -n rag_env python==3.11
+ conda activate rag_env
+ ```
+
+ 如果使用macOS且需要处理PPTX文件,需要下载依赖库处理PPTX文件
+
+ ```bash
+ brew install mono-libgdiplus
+ ```
+
+ 直接使用poetry安装项目依赖包:
+
+ ```bash
+ pip install poetry
+ poetry install
+ poetry run aliyun-bootstrap -a install
+ ```
+
+- 常见网络超时问题
+
+ 注:在安装过程中,若遇到网络连接超时的情况,可以添加阿里云或清华的镜像源,在 pyproject.toml 文件末尾追加以下几行:
+
+ ```bash
+ [[tool.poetry.source]]
+ name = "mirrors"
+ url = "http://mirrors.aliyun.com/pypi/simple/" # 阿里云
+ # url = "https://pypi.tuna.tsinghua.edu.cn/simple/" # 清华
+ priority = "default"
+ ```
+
+ 之后,再依次执行以下命令:
+
+ ```bash
+ poetry lock
+ poetry install
+ ```
+
+3. 下载其他模型到本地
+
+ ```bash
+ # 支持 model name (默认 ""), 没有参数时, 默认下载上述所有模型。
+ load_model [--model-name MODEL_NAME]
+ ```
+
+4. 启动RAG服务
+
+ 使用DashScope API,需要在命令行引入环境变量
+
+ ```bash
+ export DASHSCOPE_API_KEY=""
+ ```
+
+ 启动:
+
+ ```bash
+ # 启动,支持自定义host(默认0.0.0.0), port(默认8001), config(默认src/pai_rag/config/settings.yaml), skip-download-models(不加为False)
+ # 默认启动时下载模型 [bge-large-zh-v1.5, easyocr] , 可设置 skip-download-models 避免启动时下载模型.
+ # 可使用命令行 "load_model" 下载模型 including [bge-large-zh-v1.5, easyocr, SGPT-125M-weightedmean-nli-bitfit, bge-large-zh-v1.5, bge-m3, bge-reranker-base, bge-reranker-large, paraphrase-multilingual-MiniLM-L12-v2, qwen_1.8b, text2vec-large-chinese]
+ pai_rag serve [--host HOST] [--port PORT] [--config CONFIG_FILE] [--skip-download-models]
+ ```
+
+ ```bash
+ pai_rag serve
+ ```
+
+5. 启动RAG WebUI
+
+ ```bash
+ # 启动,支持自定义host(默认0.0.0.0), port(默认8002), config(默认localhost:8001)
+ pai_rag ui [--host HOST] [--port PORT] [rag-url RAG_URL]
+ ```
+
+ 你也可以打开http://127.0.0.1:8002/ 来配置RAG服务以及上传本地数据。
+
+6. 【可选】本地工具-上传数据
+
+ 向当前索引存储中插入data_path路径下的新文件
+
+ ```bash
+ load_data -c src/pai_rag/config/settings.yaml -d data_path -p pattern
+ ```
+
+ path examples:
+
+ ```
+ a. load_data -d test/example
+ b. load_data -d test/example_data/pai_document.pdf
+ c. load_data -d test/example_data -p *.pdf
+
+ ```
diff --git a/src/pai_rag/app/api/base_router.py b/src/pai_rag/app/api/base_router.py
new file mode 100644
index 00000000..18c798a0
--- /dev/null
+++ b/src/pai_rag/app/api/base_router.py
@@ -0,0 +1,14 @@
+from fastapi import APIRouter
+from fastapi.responses import RedirectResponse
+
+router = APIRouter()
+
+
+@router.get("/")
+async def api_root():
+ return RedirectResponse(url="/docs")
+
+
+@router.get("/health")
+def health_check():
+ return {"status": "OK"}
diff --git a/src/pai_rag/app/api/service.py b/src/pai_rag/app/api/service.py
index c4c162d3..f6e3b0f6 100644
--- a/src/pai_rag/app/api/service.py
+++ b/src/pai_rag/app/api/service.py
@@ -2,6 +2,7 @@
from pai_rag.core.rag_config_manager import RagConfigManager
from pai_rag.core.rag_service import rag_service
from pai_rag.app.api import query
+from pai_rag.app.api import base_router
from pai_rag.app.api.v1.chat import router_v1
from pai_rag.app.api import agent_demo
from pai_rag.app.api.middleware import init_middleware
@@ -9,6 +10,7 @@
def init_router(app: FastAPI):
+ app.include_router(base_router.router, prefix="", tags=["base"])
app.include_router(query.router, prefix="/service", tags=["RAG"])
app.include_router(router_v1, prefix="/v1", tags=["v1"])
app.include_router(agent_demo.demo_router, tags=["AgentDemo"], prefix="/demo/api")
diff --git a/src/pai_rag/app/web/rag_client.py b/src/pai_rag/app/web/rag_client.py
index 9b11d38b..ff363b06 100644
--- a/src/pai_rag/app/web/rag_client.py
+++ b/src/pai_rag/app/web/rag_client.py
@@ -103,6 +103,10 @@ def index_url(self):
def list_index_url(self):
return f"{self.endpoint}v1/indexes"
+ @property
+ def health_check_url(self):
+ return f"{self.endpoint}health"
+
def _format_rag_response(
self, question, response, with_history: bool = False, stream: bool = False
):
@@ -199,6 +203,14 @@ def _format_rag_response(
response["result"] = formatted_answer
return response
+ def check_health(self):
+ try:
+ r = requests.get(self.health_check_url)
+ return r.status_code == HTTPStatus.OK
+ except Exception as ex:
+ logger.error(f"Check health failed: {ex}")
+ return False
+
def query(
self,
text: str,
diff --git a/src/pai_rag/app/web/webui.py b/src/pai_rag/app/web/webui.py
index 18aa9a61..3c91f34c 100644
--- a/src/pai_rag/app/web/webui.py
+++ b/src/pai_rag/app/web/webui.py
@@ -24,6 +24,15 @@
def resume_ui():
outputs = {}
+
+ if not rag_client.check_health():
+ gr.Warning(
+ "RAG service is not ready. Please check the service status and refresh later."
+ )
+ elems = elem_manager.get_elem_list()
+ outputs = {elem: gr.update() for elem in elems}
+ return outputs
+
rag_config = rag_client.get_config()
view_model = ViewModel.from_app_config(rag_config)
index_map = get_index_map()
@@ -46,7 +55,6 @@ def resume_ui():
# outputs[elem] = elem_attr["value"]
# else:
# outputs[elem] = elem.__class__(**elem_attr).value
-
return outputs