Skip to content

Commit

Permalink
Merge branch 'feature' into personal/xiaowen/bugfix_prmt
Browse files Browse the repository at this point in the history
  • Loading branch information
wwxxzz authored Dec 24, 2024
2 parents 10ba4a2 + b9ab299 commit aeed67e
Show file tree
Hide file tree
Showing 11 changed files with 1,135 additions and 8 deletions.
15 changes: 14 additions & 1 deletion docs/config_guide_cn.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ source = [PaiEas, OpenAI, DashScope]

## rag.index

vector_store.type = [FAISS, Hologres, ElasticSearch, AnalyticDB, Milvus]
vector_store.type = [FAISS, Hologres, ElasticSearch, AnalyticDB, Milvus, Tablestore]

目前, pai_rag 支持多种方式创建和存储索引。

Expand Down Expand Up @@ -153,6 +153,19 @@ vector_store.type = [FAISS, Hologres, ElasticSearch, AnalyticDB, Milvus]
database = "pairag"
collection = "pairag_collection"

如果 vector_store.type = "Tablestore", 需要提供如下信息:

[rag.index]
persist_path = "localdata/storage"

[rag.index.vector_store]
type = "Tablestore"
endpoint = ""
instance_name = ""
access_key_id = ""
access_key_secret = ""
table_name = "pai_rag"

该设置也可在网页中配置。

## rag.node_parser
Expand Down
15 changes: 14 additions & 1 deletion docs/config_guide_en.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ This setting is also available in webui.

## rag.index

vector_store.type = [FAISS, Hologres, ElasticSearch, AnalyticDB, Milvus]
vector_store.type = [FAISS, Hologres, ElasticSearch, AnalyticDB, Milvus, Tablestore]

Currently, pai_rag provides a variety of approaches for creating & storing indices.

Expand Down Expand Up @@ -154,6 +154,19 @@ If vector_store.type = "Milvus", you need to provide the following information:
database = "pairag"
collection = "pairag_collection"

If vector_store.type = "Tablestore", you need to provide the following information:

[rag.index]
persist_path = "localdata/storage"

[rag.index.vector_store]
type = "Tablestore"
endpoint = ""
instance_name = ""
access_key_id = ""
access_key_secret = ""
table_name = "pai_rag"

This setting is also available in webui.

## rag.node_parser
Expand Down
198 changes: 197 additions & 1 deletion poetry.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ aspose-slides = "^24.10.0"
ultralytics = "8.3.43"
datasketch = "^1.6.5"
primp = "0.9.1"
tablestore = "^6.1.0"

[tool.poetry.scripts]
pai_rag = "pai_rag.main:run"
Expand Down
4 changes: 4 additions & 0 deletions src/pai_rag/app/web/event_listeners.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ def change_vectordb_conn(vectordb_type):
milvus_visible = False
opensearch_visible = False
postgresql_visible = False
tablestore_visible = False
if vectordb_type.lower() == "analyticdb":
adb_visible = True
elif vectordb_type.lower() == "hologres":
Expand All @@ -175,6 +176,8 @@ def change_vectordb_conn(vectordb_type):
opensearch_visible = True
elif vectordb_type.lower() == "postgresql":
postgresql_visible = True
elif vectordb_type.lower() == "tablestore":
tablestore_visible = True

return [
gr.update(visible=adb_visible),
Expand All @@ -184,6 +187,7 @@ def change_vectordb_conn(vectordb_type):
gr.update(visible=milvus_visible),
gr.update(visible=opensearch_visible),
gr.update(visible=postgresql_visible),
gr.update(visible=tablestore_visible),
]


Expand Down
40 changes: 40 additions & 0 deletions src/pai_rag/app/web/index_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
MilvusVectorStoreConfig,
OpenSearchVectorStoreConfig,
PostgreSQLVectorStoreConfig,
TablestoreVectorStoreConfig,
)


Expand Down Expand Up @@ -72,6 +73,11 @@
"postgresql_username",
"postgresql_password",
"postgresql_table_name",
"tablestore_endpoint",
"tablestore_instance_name",
"tablestore_access_key_id",
"tablestore_access_key_secret",
"tablestore_table_name",
]


Expand Down Expand Up @@ -292,6 +298,26 @@ def index_to_components_settings(
{"value": ""},
]
)
if isinstance(vector_store_config, TablestoreVectorStoreConfig):
vector_component_settings.extend(
[
{"value": vector_store_config.endpoint},
{"value": vector_store_config.instance_name},
{"value": vector_store_config.access_key_id},
{"value": vector_store_config.access_key_secret},
{"value": vector_store_config.table_name},
]
)
else:
vector_component_settings.extend(
[
{"value": ""},
{"value": ""},
{"value": ""},
{"value": ""},
{"value": ""},
]
)
component_settings = [
*index_component_settings,
*embed_component_settings,
Expand Down Expand Up @@ -359,6 +385,11 @@ def components_to_index(
milvus_password,
milvus_database,
milvus_collection_name,
tablestore_endpoint,
tablestore_instance_name,
tablestore_access_key_id,
tablestore_access_key_secret,
tablestore_table_name,
**kwargs,
) -> RagIndexEntry:
if vector_index is None or vector_index.lower() == "new":
Expand Down Expand Up @@ -443,6 +474,15 @@ def components_to_index(
"username": postgresql_username,
"password": postgresql_password,
}
elif vectordb_type.lower() == "tablestore":
vector_store = {
"type": vectordb_type.lower(),
"endpoint": tablestore_endpoint,
"instance_name": tablestore_instance_name,
"access_key_id": tablestore_access_key_id,
"access_key_secret": tablestore_access_key_secret,
"table_name": tablestore_table_name,
}
else:
raise ValueError(f"Unknown vector db type: {vectordb_type}")

Expand Down
39 changes: 39 additions & 0 deletions src/pai_rag/app/web/tabs/vector_db_panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ def create_vector_db_panel() -> Dict[str, Any]:
"faiss",
"opensearch",
"postgresql",
"tablestore",
],
label="Which VectorStore do you want to use?",
elem_id="vectordb_type",
Expand Down Expand Up @@ -232,6 +233,37 @@ def create_vector_db_panel() -> Dict[str, Any]:
interactive=True,
)

with gr.Column(visible=(vectordb_type == "tablestore")) as tablestore_col:
with gr.Row():
tablestore_endpoint = gr.Textbox(
label="tablestore_endpoint",
elem_id="tablestore_endpoint",
interactive=True,
)
tablestore_instance_name = gr.Textbox(
label="tablestore_instance_name",
elem_id="tablestore_instance_name",
interactive=True,
)
with gr.Row():
tablestore_access_key_id = gr.Textbox(
label="tablestore_access_key_id",
elem_id="tablestore_access_key_id",
interactive=True,
)
tablestore_access_key_secret = gr.Textbox(
label="tablestore_access_key_secret",
type="password",
elem_id="tablestore_access_key_secret",
interactive=True,
)
with gr.Row():
tablestore_table_name = gr.Textbox(
label="tablestore_table_name",
elem_id="tablestore_table_name",
interactive=True,
)

vectordb_type.change(
fn=ev_listeners.change_vectordb_conn,
inputs=vectordb_type,
Expand All @@ -243,6 +275,7 @@ def create_vector_db_panel() -> Dict[str, Any]:
milvus_col,
opensearch_col,
postgresql_col,
tablestore_col,
],
)
db_related_elements = [
Expand Down Expand Up @@ -291,6 +324,12 @@ def create_vector_db_panel() -> Dict[str, Any]:
adb_account,
adb_account_password,
adb_namespace,
# tablestore
tablestore_endpoint,
tablestore_instance_name,
tablestore_access_key_id,
tablestore_access_key_secret,
tablestore_table_name,
]
components.extend(db_related_elements)
return db_related_elements, components_to_dict(components)
87 changes: 87 additions & 0 deletions src/pai_rag/integrations/index/pai/utils/vector_store_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,16 @@
import faiss
import os
import json
import tablestore
from llama_index.core.vector_stores.simple import DEFAULT_VECTOR_STORE, NAMESPACE_SEP
from llama_index.core.vector_stores.types import DEFAULT_PERSIST_FNAME
from elasticsearch.helpers.vectorstore import AsyncDenseVectorStrategy
from pai_rag.integrations.index.pai.utils.sparse_embed_function import (
BGEM3SparseEmbeddingFunction,
)
from pai_rag.integrations.vector_stores.tablestore.tablestore import (
TablestoreVectorStore,
)
from pai_rag.integrations.vector_stores.hologres.hologres import HologresVectorStore
from pai_rag.integrations.vector_stores.elasticsearch.my_elasticsearch import (
MyElasticsearchStore,
Expand All @@ -27,6 +31,7 @@
ElasticSearchVectorStoreConfig,
OpenSearchVectorStoreConfig,
HologresVectorStoreConfig,
TablestoreVectorStoreConfig,
)


Expand Down Expand Up @@ -57,6 +62,8 @@ def create_vector_store(
create_vector_store_func = create_postgresql
elif isinstance(vectordb_config, OpenSearchVectorStoreConfig):
create_vector_store_func = create_opensearch
elif isinstance(vectordb_config, TablestoreVectorStoreConfig):
create_vector_store_func = create_tablestore
else:
raise ValueError(f"Unknown vector store config {vectordb_config}.")

Expand Down Expand Up @@ -236,6 +243,86 @@ def create_opensearch(
return opensearch_store


def create_tablestore(
tablestore_config: TablestoreVectorStoreConfig,
embed_dims: int,
is_image_store: bool = False,
):
table_name = tablestore_config.table_name
if is_image_store:
table_name = f"{table_name}__image"

tablestore_store = TablestoreVectorStore(
endpoint=tablestore_config.endpoint,
instance_name=tablestore_config.instance_name,
access_key_id=tablestore_config.access_key_id,
access_key_secret=tablestore_config.access_key_secret,
table_name=table_name,
index_name="pai_rag_vector_store_ots_index_v1",
vector_dimension=embed_dims,
# metadata mapping is used to filter non-vector fields.
metadata_mappings=[
tablestore.FieldSchema(
"file_name",
tablestore.FieldType.KEYWORD,
index=True,
enable_sort_and_agg=True,
),
tablestore.FieldSchema(
"file_type",
tablestore.FieldType.KEYWORD,
index=True,
enable_sort_and_agg=True,
),
tablestore.FieldSchema(
"file_size",
tablestore.FieldType.LONG,
index=True,
enable_sort_and_agg=True,
),
tablestore.FieldSchema(
"file_path",
tablestore.FieldType.TEXT,
index=True,
enable_sort_and_agg=False,
),
tablestore.FieldSchema(
"image_url",
tablestore.FieldType.TEXT,
index=True,
enable_sort_and_agg=False,
),
tablestore.FieldSchema(
"creation_date",
tablestore.FieldType.DATE,
index=True,
enable_sort_and_agg=True,
date_formats=[
"yyyy-MM-dd",
"yyyy-MM-dd HH:mm",
"yyyy-MM-dd HH:mm:ss",
"yyyy-MM-dd HH:mm:ss.SSS",
],
),
tablestore.FieldSchema(
"last_modified_date",
tablestore.FieldType.DATE,
index=True,
enable_sort_and_agg=True,
date_formats=[
"yyyy-MM-dd",
"yyyy-MM-dd HH:mm",
"yyyy-MM-dd HH:mm:ss",
"yyyy-MM-dd HH:mm:ss.SSS",
],
),
],
)
tablestore_store.create_table_if_not_exist()
tablestore_store.create_search_index_if_not_exist()
return tablestore_store


def create_postgresql(
pg_config: PostgreSQLVectorStoreConfig,
embed_dims: int,
Expand Down
13 changes: 13 additions & 0 deletions src/pai_rag/integrations/index/pai/vector_store_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ class SupportedVectorStoreType(str, Enum):
opensearch = "opensearch"
milvus = "milvus"
hologres = "hologres"
tablestore = "tablestore"


class VectorIndexRetrievalType(str, Enum):
Expand All @@ -25,6 +26,7 @@ class VectorIndexRetrievalType(str, Enum):
SupportedVectorStoreType.elasticsearch,
SupportedVectorStoreType.postgresql,
SupportedVectorStoreType.milvus,
SupportedVectorStoreType.tablestore,
]


Expand Down Expand Up @@ -99,6 +101,17 @@ class OpenSearchVectorStoreConfig(BaseVectorStoreConfig):
table_name: str


class TablestoreVectorStoreConfig(BaseVectorStoreConfig):
type: Literal[
SupportedVectorStoreType.tablestore
] = SupportedVectorStoreType.tablestore
endpoint: str
instance_name: str
access_key_id: str
access_key_secret: str
table_name: str


class PostgreSQLVectorStoreConfig(BaseVectorStoreConfig):
type: Literal[
SupportedVectorStoreType.postgresql
Expand Down
Loading

0 comments on commit aeed67e

Please sign in to comment.