From ebdb7426314df2441dcfc0e6be48de096be11e2c Mon Sep 17 00:00:00 2001 From: Ceceliachenen Date: Mon, 23 Dec 2024 17:32:26 +0800 Subject: [PATCH 1/2] fix load oss files (#315) * fix load oss files * fix oss loader --------- Co-authored-by: Yue Fei --- src/pai_rag/utils/oss_client.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/pai_rag/utils/oss_client.py b/src/pai_rag/utils/oss_client.py index d0f280c7..accef7b6 100644 --- a/src/pai_rag/utils/oss_client.py +++ b/src/pai_rag/utils/oss_client.py @@ -69,7 +69,6 @@ def list_objects(self, prefix: str): """ 列出存储桶中指定前缀的对象列表。 - 该方法通过调用oss bucket的list_objects函数,查询与给定前缀匹配的所有对象,并返回这些对象的列表。 参数: - prefix (str): 对象名的前缀,用于筛选满足条件的对象。默认为空字符串,表示不指定前缀,即列出所有对象。 @@ -77,10 +76,11 @@ def list_objects(self, prefix: str): 返回: - list: 包含满足前缀条件的所有对象的列表。 """ - # 调用bucket的list_objects方法,传入前缀参数 - res = self.bucket.list_objects(prefix=prefix) - # 返回查询到的对象列表 - return res.object_list + object_list = [] + for obj in oss2.ObjectIteratorV2(self.bucket, prefix): + logger.info("file: " + obj.key) + object_list.append(obj) + return object_list def put_object_acl(self, key, permission): if key.endswith(".txt"): From b9ab29953e749c3bd3ec0e225e358a137f8b6b52 Mon Sep 17 00:00:00 2001 From: ScriptShi Date: Tue, 24 Dec 2024 10:03:27 +0800 Subject: [PATCH 2/2] Add tablestore vector store. (#312) * Add tablestore vector store. * add tablestore --------- Co-authored-by: Yue Fei --- docs/config_guide_cn.md | 15 +- docs/config_guide_en.md | 15 +- poetry.lock | 198 ++++- pyproject.toml | 1 + src/pai_rag/app/web/event_listeners.py | 4 + src/pai_rag/app/web/index_utils.py | 40 + src/pai_rag/app/web/tabs/vector_db_panel.py | 39 + .../index/pai/utils/vector_store_utils.py | 87 +++ .../index/pai/vector_store_config.py | 13 + .../vector_stores/tablestore/tablestore.py | 721 ++++++++++++++++++ 10 files changed, 1130 insertions(+), 3 deletions(-) create mode 100644 src/pai_rag/integrations/vector_stores/tablestore/tablestore.py diff --git a/docs/config_guide_cn.md b/docs/config_guide_cn.md index d7b6d15b..6ce27be4 100644 --- a/docs/config_guide_cn.md +++ b/docs/config_guide_cn.md @@ -87,7 +87,7 @@ source = [PaiEas, OpenAI, DashScope] ## rag.index -vector_store.type = [FAISS, Hologres, ElasticSearch, AnalyticDB, Milvus] +vector_store.type = [FAISS, Hologres, ElasticSearch, AnalyticDB, Milvus, Tablestore] 目前, pai_rag 支持多种方式创建和存储索引。 @@ -153,6 +153,19 @@ vector_store.type = [FAISS, Hologres, ElasticSearch, AnalyticDB, Milvus] database = "pairag" collection = "pairag_collection" +如果 vector_store.type = "Tablestore", 需要提供如下信息: + + [rag.index] + persist_path = "localdata/storage" + + [rag.index.vector_store] + type = "Tablestore" + endpoint = "" + instance_name = "" + access_key_id = "" + access_key_secret = "" + table_name = "pai_rag" + 该设置也可在网页中配置。 ## rag.node_parser diff --git a/docs/config_guide_en.md b/docs/config_guide_en.md index bc660ead..28dc4909 100644 --- a/docs/config_guide_en.md +++ b/docs/config_guide_en.md @@ -88,7 +88,7 @@ This setting is also available in webui. ## rag.index -vector_store.type = [FAISS, Hologres, ElasticSearch, AnalyticDB, Milvus] +vector_store.type = [FAISS, Hologres, ElasticSearch, AnalyticDB, Milvus, Tablestore] Currently, pai_rag provides a variety of approaches for creating & storing indices. @@ -154,6 +154,19 @@ If vector_store.type = "Milvus", you need to provide the following information: database = "pairag" collection = "pairag_collection" +If vector_store.type = "Tablestore", you need to provide the following information: + + [rag.index] + persist_path = "localdata/storage" + + [rag.index.vector_store] + type = "Tablestore" + endpoint = "" + instance_name = "" + access_key_id = "" + access_key_secret = "" + table_name = "pai_rag" + This setting is also available in webui. ## rag.node_parser diff --git a/poetry.lock b/poetry.lock index 257c8345..cd5df14e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -906,6 +906,10 @@ files = [ {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a37b8f0391212d29b3a91a799c8e4a2855e0576911cdfb2515487e30e322253d"}, {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:e84799f09591700a4154154cab9787452925578841a94321d5ee8fb9a9a328f0"}, {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f66b5337fa213f1da0d9000bc8dc0cb5b896b726eefd9c6046f699b169c41b9e"}, + {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:5dab0844f2cf82be357a0eb11a9087f70c5430b2c241493fc122bb6f2bb0917c"}, + {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e4fe605b917c70283db7dfe5ada75e04561479075761a0b3866c081d035b01c1"}, + {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:1e9a65b5736232e7a7f91ff3d02277f11d339bf34099a56cdab6a8b3410a02b2"}, + {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:58d4b711689366d4a03ac7957ab8c28890415e267f9b6589969e74b6e42225ec"}, {file = "Brotli-1.1.0-cp310-cp310-win32.whl", hash = "sha256:be36e3d172dc816333f33520154d708a2657ea63762ec16b62ece02ab5e4daf2"}, {file = "Brotli-1.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:0c6244521dda65ea562d5a69b9a26120769b7a9fb3db2fe9545935ed6735b128"}, {file = "Brotli-1.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:a3daabb76a78f829cafc365531c972016e4aa8d5b4bf60660ad8ecee19df7ccc"}, @@ -918,8 +922,14 @@ files = [ {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:19c116e796420b0cee3da1ccec3b764ed2952ccfcc298b55a10e5610ad7885f9"}, {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:510b5b1bfbe20e1a7b3baf5fed9e9451873559a976c1a78eebaa3b86c57b4265"}, {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a1fd8a29719ccce974d523580987b7f8229aeace506952fa9ce1d53a033873c8"}, + {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c247dd99d39e0338a604f8c2b3bc7061d5c2e9e2ac7ba9cc1be5a69cb6cd832f"}, + {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1b2c248cd517c222d89e74669a4adfa5577e06ab68771a529060cf5a156e9757"}, + {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:2a24c50840d89ded6c9a8fdc7b6ed3692ed4e86f1c4a4a938e1e92def92933e0"}, + {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f31859074d57b4639318523d6ffdca586ace54271a73ad23ad021acd807eb14b"}, {file = "Brotli-1.1.0-cp311-cp311-win32.whl", hash = "sha256:39da8adedf6942d76dc3e46653e52df937a3c4d6d18fdc94a7c29d263b1f5b50"}, {file = "Brotli-1.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:aac0411d20e345dc0920bdec5548e438e999ff68d77564d5e9463a7ca9d3e7b1"}, + {file = "Brotli-1.1.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:32d95b80260d79926f5fab3c41701dbb818fde1c9da590e77e571eefd14abe28"}, + {file = "Brotli-1.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b760c65308ff1e462f65d69c12e4ae085cff3b332d894637f6273a12a482d09f"}, {file = "Brotli-1.1.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:316cc9b17edf613ac76b1f1f305d2a748f1b976b033b049a6ecdfd5612c70409"}, {file = "Brotli-1.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:caf9ee9a5775f3111642d33b86237b05808dafcd6268faa492250e9b78046eb2"}, {file = "Brotli-1.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70051525001750221daa10907c77830bc889cb6d865cc0b813d9db7fefc21451"}, @@ -930,8 +940,24 @@ files = [ {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:4093c631e96fdd49e0377a9c167bfd75b6d0bad2ace734c6eb20b348bc3ea180"}, {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:7e4c4629ddad63006efa0ef968c8e4751c5868ff0b1c5c40f76524e894c50248"}, {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:861bf317735688269936f755fa136a99d1ed526883859f86e41a5d43c61d8966"}, + {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:87a3044c3a35055527ac75e419dfa9f4f3667a1e887ee80360589eb8c90aabb9"}, + {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:c5529b34c1c9d937168297f2c1fde7ebe9ebdd5e121297ff9c043bdb2ae3d6fb"}, + {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:ca63e1890ede90b2e4454f9a65135a4d387a4585ff8282bb72964fab893f2111"}, + {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e79e6520141d792237c70bcd7a3b122d00f2613769ae0cb61c52e89fd3443839"}, {file = "Brotli-1.1.0-cp312-cp312-win32.whl", hash = "sha256:5f4d5ea15c9382135076d2fb28dde923352fe02951e66935a9efaac8f10e81b0"}, {file = "Brotli-1.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:906bc3a79de8c4ae5b86d3d75a8b77e44404b0f4261714306e3ad248d8ab0951"}, + {file = "Brotli-1.1.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8bf32b98b75c13ec7cf774164172683d6e7891088f6316e54425fde1efc276d5"}, + {file = "Brotli-1.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7bc37c4d6b87fb1017ea28c9508b36bbcb0c3d18b4260fcdf08b200c74a6aee8"}, + {file = "Brotli-1.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c0ef38c7a7014ffac184db9e04debe495d317cc9c6fb10071f7fefd93100a4f"}, + {file = "Brotli-1.1.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:91d7cc2a76b5567591d12c01f019dd7afce6ba8cba6571187e21e2fc418ae648"}, + {file = "Brotli-1.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a93dde851926f4f2678e704fadeb39e16c35d8baebd5252c9fd94ce8ce68c4a0"}, + {file = "Brotli-1.1.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f0db75f47be8b8abc8d9e31bc7aad0547ca26f24a54e6fd10231d623f183d089"}, + {file = "Brotli-1.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6967ced6730aed543b8673008b5a391c3b1076d834ca438bbd70635c73775368"}, + {file = "Brotli-1.1.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:7eedaa5d036d9336c95915035fb57422054014ebdeb6f3b42eac809928e40d0c"}, + {file = "Brotli-1.1.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:d487f5432bf35b60ed625d7e1b448e2dc855422e87469e3f450aa5552b0eb284"}, + {file = "Brotli-1.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:832436e59afb93e1836081a20f324cb185836c617659b07b129141a8426973c7"}, + {file = "Brotli-1.1.0-cp313-cp313-win32.whl", hash = "sha256:43395e90523f9c23a3d5bdf004733246fba087f2948f87ab28015f12359ca6a0"}, + {file = "Brotli-1.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:9011560a466d2eb3f5a6e4929cf4a09be405c64154e12df0dd72713f6500e32b"}, {file = "Brotli-1.1.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:a090ca607cbb6a34b0391776f0cb48062081f5f60ddcce5d11838e67a01928d1"}, {file = "Brotli-1.1.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2de9d02f5bda03d27ede52e8cfe7b865b066fa49258cbab568720aa5be80a47d"}, {file = "Brotli-1.1.0-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2333e30a5e00fe0fe55903c8832e08ee9c3b1382aacf4db26664a16528d51b4b"}, @@ -941,6 +967,10 @@ files = [ {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:fd5f17ff8f14003595ab414e45fce13d073e0762394f957182e69035c9f3d7c2"}, {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:069a121ac97412d1fe506da790b3e69f52254b9df4eb665cd42460c837193354"}, {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:e93dfc1a1165e385cc8239fab7c036fb2cd8093728cbd85097b284d7b99249a2"}, + {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_2_aarch64.whl", hash = "sha256:aea440a510e14e818e67bfc4027880e2fb500c2ccb20ab21c7a7c8b5b4703d75"}, + {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_2_i686.whl", hash = "sha256:6974f52a02321b36847cd19d1b8e381bf39939c21efd6ee2fc13a28b0d99348c"}, + {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_2_ppc64le.whl", hash = "sha256:a7e53012d2853a07a4a79c00643832161a910674a893d296c9f1259859a289d2"}, + {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_2_x86_64.whl", hash = "sha256:d7702622a8b40c49bffb46e1e3ba2e81268d5c04a34f460978c6b5517a34dd52"}, {file = "Brotli-1.1.0-cp36-cp36m-win32.whl", hash = "sha256:a599669fd7c47233438a56936988a2478685e74854088ef5293802123b5b2460"}, {file = "Brotli-1.1.0-cp36-cp36m-win_amd64.whl", hash = "sha256:d143fd47fad1db3d7c27a1b1d66162e855b5d50a89666af46e1679c496e8e579"}, {file = "Brotli-1.1.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:11d00ed0a83fa22d29bc6b64ef636c4552ebafcef57154b4ddd132f5638fbd1c"}, @@ -952,6 +982,10 @@ files = [ {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:919e32f147ae93a09fe064d77d5ebf4e35502a8df75c29fb05788528e330fe74"}, {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:23032ae55523cc7bccb4f6a0bf368cd25ad9bcdcc1990b64a647e7bbcce9cb5b"}, {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:224e57f6eac61cc449f498cc5f0e1725ba2071a3d4f48d5d9dffba42db196438"}, + {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:cb1dac1770878ade83f2ccdf7d25e494f05c9165f5246b46a621cc849341dc01"}, + {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_2_i686.whl", hash = "sha256:3ee8a80d67a4334482d9712b8e83ca6b1d9bc7e351931252ebef5d8f7335a547"}, + {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_2_ppc64le.whl", hash = "sha256:5e55da2c8724191e5b557f8e18943b1b4839b8efc3ef60d65985bcf6f587dd38"}, + {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:d342778ef319e1026af243ed0a07c97acf3bad33b9f29e7ae6a1f68fd083e90c"}, {file = "Brotli-1.1.0-cp37-cp37m-win32.whl", hash = "sha256:587ca6d3cef6e4e868102672d3bd9dc9698c309ba56d41c2b9c85bbb903cdb95"}, {file = "Brotli-1.1.0-cp37-cp37m-win_amd64.whl", hash = "sha256:2954c1c23f81c2eaf0b0717d9380bd348578a94161a65b3a2afc62c86467dd68"}, {file = "Brotli-1.1.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:efa8b278894b14d6da122a72fefcebc28445f2d3f880ac59d46c90f4c13be9a3"}, @@ -964,6 +998,10 @@ files = [ {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:1ab4fbee0b2d9098c74f3057b2bc055a8bd92ccf02f65944a241b4349229185a"}, {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:141bd4d93984070e097521ed07e2575b46f817d08f9fa42b16b9b5f27b5ac088"}, {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:fce1473f3ccc4187f75b4690cfc922628aed4d3dd013d047f95a9b3919a86596"}, + {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:d2b35ca2c7f81d173d2fadc2f4f31e88cc5f7a39ae5b6db5513cf3383b0e0ec7"}, + {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:af6fa6817889314555aede9a919612b23739395ce767fe7fcbea9a80bf140fe5"}, + {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:2feb1d960f760a575dbc5ab3b1c00504b24caaf6986e2dc2b01c09c87866a943"}, + {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:4410f84b33374409552ac9b6903507cdb31cd30d2501fc5ca13d18f73548444a"}, {file = "Brotli-1.1.0-cp38-cp38-win32.whl", hash = "sha256:db85ecf4e609a48f4b29055f1e144231b90edc90af7481aa731ba2d059226b1b"}, {file = "Brotli-1.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:3d7954194c36e304e1523f55d7042c59dc53ec20dd4e9ea9d151f1b62b4415c0"}, {file = "Brotli-1.1.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5fb2ce4b8045c78ebbc7b8f3c15062e435d47e7393cc57c25115cfd49883747a"}, @@ -976,6 +1014,10 @@ files = [ {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:949f3b7c29912693cee0afcf09acd6ebc04c57af949d9bf77d6101ebb61e388c"}, {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:89f4988c7203739d48c6f806f1e87a1d96e0806d44f0fba61dba81392c9e474d"}, {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:de6551e370ef19f8de1807d0a9aa2cdfdce2e85ce88b122fe9f6b2b076837e59"}, + {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:0737ddb3068957cf1b054899b0883830bb1fec522ec76b1098f9b6e0f02d9419"}, + {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:4f3607b129417e111e30637af1b56f24f7a49e64763253bbc275c75fa887d4b2"}, + {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:6c6e0c425f22c1c719c42670d561ad682f7bfeeef918edea971a79ac5252437f"}, + {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:494994f807ba0b92092a163a0a283961369a65f6cbe01e8891132b7a320e61eb"}, {file = "Brotli-1.1.0-cp39-cp39-win32.whl", hash = "sha256:f0d8a7a6b5983c2496e364b969f0e526647a06b075d034f3297dc66f3b360c64"}, {file = "Brotli-1.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:cdad5b9014d83ca68c25d2e9444e28e967ef16e80f6b436918c700c117a85467"}, {file = "Brotli-1.1.0.tar.gz", hash = "sha256:81de08ac11bcb85841e440c13611c00b67d3bf82698314928d0b676362546724"}, @@ -1498,6 +1540,122 @@ files = [ [package.extras] toml = ["tomli"] +[[package]] +name = "crc32c" +version = "2.7.1" +description = "A python package implementing the crc32c algorithm in hardware and software" +optional = false +python-versions = ">=3.7" +files = [ + {file = "crc32c-2.7.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:1fd1f9c6b50d7357736676278a1b8c8986737b8a1c76d7eab4baa71d0b6af67f"}, + {file = "crc32c-2.7.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:805c2be1bc0e251c48439a62b0422385899c15289483692bc70e78473c1039f1"}, + {file = "crc32c-2.7.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f4333e62b7844dfde112dbb8489fd2970358eddc3310db21e943a9f6994df749"}, + {file = "crc32c-2.7.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f0fadc741e79dc705e2d9ee967473e8a061d26b04310ed739f1ee292f33674f"}, + {file = "crc32c-2.7.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:91ced31055d26d59385d708bbd36689e1a1d604d4b0ceb26767eb5a83156f85d"}, + {file = "crc32c-2.7.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:36ffa999b72e3c17f6a066ae9e970b40f8c65f38716e436c39a33b809bc6ed9f"}, + {file = "crc32c-2.7.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:e80114dd7f462297e54d5da1b9ff472e5249c5a2b406aa51c371bb0edcbf76bd"}, + {file = "crc32c-2.7.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:676f5b46da268b5190f9fb91b3f037a00d114b411313664438525db876adc71f"}, + {file = "crc32c-2.7.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8d0e660c9ed269e90692993a4457a932fc22c9cc96caf79dd1f1a84da85bb312"}, + {file = "crc32c-2.7.1-cp310-cp310-win32.whl", hash = "sha256:17a2c3f8c6d85b04b5511af827b5dbbda4e672d188c0b9f20a8156e93a1aa7b6"}, + {file = "crc32c-2.7.1-cp310-cp310-win_amd64.whl", hash = "sha256:3208764c29688f91a35392073229975dd7687b6cb9f76b919dae442cabcd5126"}, + {file = "crc32c-2.7.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:19e03a50545a3ef400bd41667d5525f71030488629c57d819e2dd45064f16192"}, + {file = "crc32c-2.7.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8c03286b1e5ce9bed7090084f206aacd87c5146b4b10de56fe9e86cbbbf851cf"}, + {file = "crc32c-2.7.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:80ebbf144a1a56a532b353e81fa0f3edca4f4baa1bf92b1dde2c663a32bb6a15"}, + {file = "crc32c-2.7.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:96b794fd11945298fdd5eb1290a812efb497c14bc42592c5c992ca077458eeba"}, + {file = "crc32c-2.7.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9df7194dd3c0efb5a21f5d70595b7a8b4fd9921fbbd597d6d8e7a11eca3e2d27"}, + {file = "crc32c-2.7.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d698eec444b18e296a104d0b9bb6c596c38bdcb79d24eba49604636e9d747305"}, + {file = "crc32c-2.7.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e07cf10ef852d219d179333fd706d1c415626f1f05e60bd75acf0143a4d8b225"}, + {file = "crc32c-2.7.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:d2a051f296e6e92e13efee3b41db388931cdb4a2800656cd1ed1d9fe4f13a086"}, + {file = "crc32c-2.7.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a1738259802978cdf428f74156175da6a5fdfb7256f647fdc0c9de1bc6cd7173"}, + {file = "crc32c-2.7.1-cp311-cp311-win32.whl", hash = "sha256:f7786d219a1a1bf27d0aa1869821d11a6f8e90415cfffc1e37791690d4a848a1"}, + {file = "crc32c-2.7.1-cp311-cp311-win_amd64.whl", hash = "sha256:887f6844bb3ad35f0778cd10793ad217f7123a5422e40041231b8c4c7329649d"}, + {file = "crc32c-2.7.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:f7d1c4e761fe42bf856130daf8b2658df33fe0ced3c43dadafdfeaa42b57b950"}, + {file = "crc32c-2.7.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:73361c79a6e4605204457f19fda18b042a94508a52e53d10a4239da5fb0f6a34"}, + {file = "crc32c-2.7.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:afd778fc8ac0ed2ffbfb122a9aa6a0e409a8019b894a1799cda12c01534493e0"}, + {file = "crc32c-2.7.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:56ef661b34e9f25991fface7f9ad85e81bbc1b3fe3b916fd58c893eabe2fa0b8"}, + {file = "crc32c-2.7.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:571aa4429444b5d7f588e4377663592145d2d25eb1635abb530f1281794fc7c9"}, + {file = "crc32c-2.7.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c02a3bd67dea95cdb25844aaf44ca2e1b0c1fd70b287ad08c874a95ef4bb38db"}, + {file = "crc32c-2.7.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:99d17637c4867672cb8adeea007294e3c3df9d43964369516cfe2c1f47ce500a"}, + {file = "crc32c-2.7.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:f4a400ac3c69a32e180d8753fd7ec7bccb80ade7ab0812855dce8a208e72495f"}, + {file = "crc32c-2.7.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:588587772e55624dd9c7a906ec9e8773ae0b6ac5e270fc0bc84ee2758eba90d5"}, + {file = "crc32c-2.7.1-cp312-cp312-win32.whl", hash = "sha256:9f14b60e5a14206e8173dd617fa0c4df35e098a305594082f930dae5488da428"}, + {file = "crc32c-2.7.1-cp312-cp312-win_amd64.whl", hash = "sha256:7c810a246660a24dc818047dc5f89c7ce7b2814e1e08a8e99993f4103f7219e8"}, + {file = "crc32c-2.7.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:24949bffb06fc411cc18188d33357923cb935273642164d0bb37a5f375654169"}, + {file = "crc32c-2.7.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2d5d326e7e118d4fa60187770d86b66af2fdfc63ce9eeb265f0d3e7d49bebe0b"}, + {file = "crc32c-2.7.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ba110df60c64c8e2d77a9425b982a520ccdb7abe42f06604f4d98a45bb1fff62"}, + {file = "crc32c-2.7.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c277f9d16a3283e064d54854af0976b72abaa89824955579b2b3f37444f89aae"}, + {file = "crc32c-2.7.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:881af0478a01331244e27197356929edbdeaef6a9f81b5c6bacfea18d2139289"}, + {file = "crc32c-2.7.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:724d5ff4d29ff093a983ae656be3307093706d850ea2a233bf29fcacc335d945"}, + {file = "crc32c-2.7.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b2416c4d88696ac322632555c0f81ab35e15f154bc96055da6cf110d642dbc10"}, + {file = "crc32c-2.7.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:60254251b88ec9b9795215f0f9ec015a6b5eef8b2c5fba1267c672d83c78fc02"}, + {file = "crc32c-2.7.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:edefc0e46f3c37372183f70338e5bdee42f6789b62fcd36ec53aa933e9dfbeaf"}, + {file = "crc32c-2.7.1-cp313-cp313-win32.whl", hash = "sha256:813af8111218970fe2adb833c5e5239f091b9c9e76f03b4dd91aaba86e99b499"}, + {file = "crc32c-2.7.1-cp313-cp313-win_amd64.whl", hash = "sha256:7d9ede7be8e4ec1c9e90aaf6884decbeef10e3473e6ddac032706d710cab5888"}, + {file = "crc32c-2.7.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:db9ac92294284b22521356715784b91cc9094eee42a5282ab281b872510d1831"}, + {file = "crc32c-2.7.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:8fcd7f2f29a30dc92af64a9ee3d38bde0c82bd20ad939999427aac94bbd87373"}, + {file = "crc32c-2.7.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5c056ef043393085523e149276a7ce0cb534b872e04f3e20d74d9a94a75c0ad7"}, + {file = "crc32c-2.7.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:03a92551a343702629af91f78d205801219692b6909f8fa126b830e332bfb0e0"}, + {file = "crc32c-2.7.1-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fb9424ec1a8ca54763155a703e763bcede82e6569fe94762614bb2de1412d4e1"}, + {file = "crc32c-2.7.1-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:88732070f6175530db04e0bb36880ac45c33d49f8ac43fa0e50cfb1830049d23"}, + {file = "crc32c-2.7.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:57a20dfc27995f568f64775eea2bbb58ae269f1a1144561df5e4a4955f79db32"}, + {file = "crc32c-2.7.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:f7186d098bfd2cff25eac6880b7c7ad80431b90610036131c1c7dd0eab42a332"}, + {file = "crc32c-2.7.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:55a77e29a265418fa34bef15bd0f2c60afae5348988aaf35ed163b4bbf93cf37"}, + {file = "crc32c-2.7.1-cp313-cp313t-win32.whl", hash = "sha256:ae38a4b6aa361595d81cab441405fbee905c72273e80a1c010fb878ae77ac769"}, + {file = "crc32c-2.7.1-cp313-cp313t-win_amd64.whl", hash = "sha256:eee2a43b663feb6c79a6c1c6e5eae339c2b72cfac31ee54ec0209fa736cf7ee5"}, + {file = "crc32c-2.7.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:04a56e9f4995559fa86bcf5d0ed5c48505a36e2be1c41d70cae5c080d9a00b74"}, + {file = "crc32c-2.7.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88c5c9c21cd9fff593bb7dfe97d3287438c8aecbcc73d227f2366860a0663521"}, + {file = "crc32c-2.7.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:595146cb94ba0055301d273113add2af5859b467db41b50367f47870c2d0a81c"}, + {file = "crc32c-2.7.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b9f3792872f1320961f33aaf0198edea371aee393bcc221fab66d10ecffd77d"}, + {file = "crc32c-2.7.1-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:999a40d75cd1696e779f6f99c29fa52be777197d1d9e3ae69cb919a05a369c1e"}, + {file = "crc32c-2.7.1-cp37-cp37m-musllinux_1_2_i686.whl", hash = "sha256:eff485526172cee7e6d1fa9c23913f92c7d38ab05674b0b578767c7b693faf5d"}, + {file = "crc32c-2.7.1-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:541dac90c64ed9ce05f85a71066567e854c1b40743a01d83fa2c66419a2e97b6"}, + {file = "crc32c-2.7.1-cp37-cp37m-win32.whl", hash = "sha256:7138ec26e79100c4cf4294ef40027a1cff26a1e23b7e5eb70efe5d7ff37cbc66"}, + {file = "crc32c-2.7.1-cp37-cp37m-win_amd64.whl", hash = "sha256:35a3ed12ac2e2551a07d246b7e6512ac39db021e006205a40c1cfd32ea73fcc3"}, + {file = "crc32c-2.7.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:af062f11aea283b7e9c95f3a97fb6bb96ac08a9063f71621c2140237df141ada"}, + {file = "crc32c-2.7.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8f25ca521ecf7cccfff0ecae4d0538b5c0c7235d27bf098241f3e2bf86aed713"}, + {file = "crc32c-2.7.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1410bcd909be36ccbf8a52c45e4bddca77adfd4e80789ac3cd575c024086516d"}, + {file = "crc32c-2.7.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:33fc8cb32f82685ebefd078e740925ea9da37a008ed5f43b68fc8324f8ca4a37"}, + {file = "crc32c-2.7.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ad3dc6283ce53ad7d1dc5775003460110ab7eebf348efebe0486a531b28f8184"}, + {file = "crc32c-2.7.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:758ead20e122496764ae50db26bb90fb47fc4b6d242c8e99e87c3f1dae1f1dce"}, + {file = "crc32c-2.7.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:e436d9044bbd51936f7aeb8b322543c516bf22371a17970a370a10af1661fa54"}, + {file = "crc32c-2.7.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:47e5be99057264b603e3cd88cf091985f33c16d3c8609f1c83ed6e72ec4179b4"}, + {file = "crc32c-2.7.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:280509210e622a236f16f031856847fd0d6704df662d7209da819ccfb40c6167"}, + {file = "crc32c-2.7.1-cp38-cp38-win32.whl", hash = "sha256:4ab48e048cfa123a9f9bdc5d4d687a3461723132c749c721a6d358605e6d470d"}, + {file = "crc32c-2.7.1-cp38-cp38-win_amd64.whl", hash = "sha256:65471d1b1b6e10a404ca8200a4271d5bc0a552c3f5dcd943c1c7835f766ea02d"}, + {file = "crc32c-2.7.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:39ca842586084bca24f9c4ab43e2d99191b1186b2f89b2122b470d0730254d1b"}, + {file = "crc32c-2.7.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a911abc33d453b3f171a3200b1e18b3fc39c204670b5b0a353cca99e4c664332"}, + {file = "crc32c-2.7.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:22a72e81ec08a7ece6a35ac68d1ed32dd4a8be7949b164db88d4b4a4bade5c5a"}, + {file = "crc32c-2.7.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:54d6f8c5be6815eabd6e3e90fa0bc13045183a6aa33a30dd684eb0f062b92213"}, + {file = "crc32c-2.7.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9c855726d71dee7ae25f81c6b54293455fc66802f34d334d22bea1f6ce8bc21c"}, + {file = "crc32c-2.7.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98d5f7fc364bb9c4c4123d149406fbee063f2e8c2cff19a12f13e50faa146237"}, + {file = "crc32c-2.7.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:51ffba582c95a281e5a3f71eacdafc96b9a1835ddae245385639458fff197034"}, + {file = "crc32c-2.7.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:3950d3c340c9d70889630ef81fba8666abfd0cf0aa19fd9c3a55634e0b383b0f"}, + {file = "crc32c-2.7.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:522fba1770aad8f7eb189f21fca591a51d96dcc749859088f462281324aec30b"}, + {file = "crc32c-2.7.1-cp39-cp39-win32.whl", hash = "sha256:812723e222b6a9fe0562554d72f4f072c3a95720c60ee500984e7d0e568caac3"}, + {file = "crc32c-2.7.1-cp39-cp39-win_amd64.whl", hash = "sha256:6793fcfe9d4130230d196abbe4021c01ffe8e85c92633bf3c8559f9836c227f5"}, + {file = "crc32c-2.7.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:2e83fedebcdeb80c19e76b7a0e5103528bb062521c40702bf34516a429e81df3"}, + {file = "crc32c-2.7.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:30004a7383538ef93bda9b22f7b3805bc0aa5625ab2675690e1b676b19417d4b"}, + {file = "crc32c-2.7.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a01b0983aa87f517c12418f9898ecf2083bf86f4ea04122e053357c3edb0d73f"}, + {file = "crc32c-2.7.1-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cb2b963c42128b38872e9ed63f04a73ce1ff89a1dfad7ea38add6fe6296497b8"}, + {file = "crc32c-2.7.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:cdd5e576fee5d255c1e68a4dae4420f21e57e6f05900b38d5ae47c713fc3330d"}, + {file = "crc32c-2.7.1-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:79f0ff50863aeb441fbfa87e9db6542ddfe3e941189dece832b0af2e454dbab0"}, + {file = "crc32c-2.7.1-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cd27a1e400d77e9872fa1303e8f9d30bd050df35ee4858354ce0b59f8227d32"}, + {file = "crc32c-2.7.1-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:274739b3e1591bd4b7ec98764f2f79c6fbcc0f7d7676d5f17369832fe14ee4f0"}, + {file = "crc32c-2.7.1-pp37-pypy37_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:050f52045b4a033a245e0ee4357e1a793de5af6496c82250ef13d8cb90a21e20"}, + {file = "crc32c-2.7.1-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:ceb4ca126f75694bda020a307221563d3c522719c0acedcc81ffb985b4867c94"}, + {file = "crc32c-2.7.1-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:eabefe7a6fb5dfc6318fb35f4d98893baef17ebda9b311498e870526d32168e7"}, + {file = "crc32c-2.7.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:217edd9ba8c5f0c3ad60c82a11fa78f01162fa106fd7f5d17175dac6bf1eedf9"}, + {file = "crc32c-2.7.1-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:15d640d9d4aa213aec6c837f602081a17d1522f8cd78b52334b62ee27b083410"}, + {file = "crc32c-2.7.1-pp38-pypy38_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:519878822bf9bdead63c25a5e4bdc26d2eae9da6056f92b9b5f3023c08f1d016"}, + {file = "crc32c-2.7.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:2bf69cfa4c3ea9f060fe06db00b7e34f771c83f73dd2c3568c2c9019479e34c2"}, + {file = "crc32c-2.7.1-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:e89d51c90f6730b67b12c97d49099ba18d0fdce18541fab94d2be95d1c939adb"}, + {file = "crc32c-2.7.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:488a0feba1bb005d0dd2f702c1da4849d083e88d82cd27b83ac2d2d93af80755"}, + {file = "crc32c-2.7.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:919262b7a12ef63f222ec19c0e092f39268802652e11669315257ae6249ec79f"}, + {file = "crc32c-2.7.1-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4181240f6080c38eec9dd1539cd23a304a12100d3f4ffe43234f32064fae5ef0"}, + {file = "crc32c-2.7.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:fedde1e53507d0ede1980e8109442edd108c04ab100abcd5145c274820dacd4f"}, + {file = "crc32c-2.7.1.tar.gz", hash = "sha256:f91b144a21eef834d64178e01982bb9179c354b3e9e5f4c803b0e5096384968c"}, +] + [[package]] name = "crcmod" version = "1.7" @@ -2128,6 +2286,18 @@ pyarrow = ["pyarrow (>=1)"] requests = ["requests (>=2.4.0,!=2.32.2,<3.0.0)"] vectorstore-mmr = ["numpy (>=1)", "simsimd (>=3)"] +[[package]] +name = "enum34" +version = "1.1.10" +description = "Python 3.4 Enum backported to 3.3, 3.2, 3.1, 2.7, 2.6, 2.5, and 2.4" +optional = false +python-versions = "*" +files = [ + {file = "enum34-1.1.10-py2-none-any.whl", hash = "sha256:a98a201d6de3f2ab3db284e70a33b0f896fbf35f8086594e8c9e74b909058d53"}, + {file = "enum34-1.1.10-py3-none-any.whl", hash = "sha256:c3858660960c984d6ab0ebad691265180da2b43f07e061c0f8dca9ef3cffd328"}, + {file = "enum34-1.1.10.tar.gz", hash = "sha256:cce6a7477ed816bd2542d03d53db9f0db935dd013b70f336a95c73979289f248"}, +] + [[package]] name = "environs" version = "9.5.0" @@ -6464,22 +6634,27 @@ files = [ {file = "paddlepaddle-3.0.0b1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3032adf8f15c3adffd3d562bc6098759a4355682d752bfe188c8ddaaf2ca04ee"}, {file = "paddlepaddle-3.0.0b1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:9508663443ac46cd374f16dc559911c6341acad3985c698d61c9542c0589cbb8"}, {file = "paddlepaddle-3.0.0b1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:7b51946159209d9ab530c7f5bd084b9bded767f2cb59b485e8f6c67a0f8428d1"}, + {file = "paddlepaddle-3.0.0b1-cp310-cp310-win_amd64.whl", hash = "sha256:4a978662b6e82ff55a042dfa71fe6848b7542e77e39db4473f441d50a3bdf325"}, {file = "paddlepaddle-3.0.0b1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:05740afe4548ceed7cb5773a4d75a4d20071f46b0d1aaf5287628f956e9751d7"}, {file = "paddlepaddle-3.0.0b1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:03519081a07b7a1ef720fb5df3ed9012e966b156abe60d80d88b8f48e90c3ccc"}, {file = "paddlepaddle-3.0.0b1-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:69bead732c5666409ec53c5cc770ad186641070aad261bd5cbefc81ed035e45c"}, {file = "paddlepaddle-3.0.0b1-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:a59e6ab5771433d935d452e5ebd692bb4558f77c8fabbff2dce696597ed2808f"}, + {file = "paddlepaddle-3.0.0b1-cp311-cp311-win_amd64.whl", hash = "sha256:44d826f4b328e18d7fb354230fe3518114ba2a9df0be1c5b8bd350354a700471"}, {file = "paddlepaddle-3.0.0b1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c4f569cbf9b83c5e200e4f91ba8eb790a4d3a119b5fc9a7a2077be117c62cd6a"}, {file = "paddlepaddle-3.0.0b1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:202dd00132f55f170e9611f22bf570b7d95efbd33418b21c2fe4250b0f08d880"}, {file = "paddlepaddle-3.0.0b1-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:3114609545f4d1eb0be0b6fc5d8d83eb391f578fc0aee23956dfc4cddc76b969"}, {file = "paddlepaddle-3.0.0b1-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:b1649063906c3ae0e1ef66edf5c69257b00eec071038936e84fe24dbe2fef6be"}, + {file = "paddlepaddle-3.0.0b1-cp312-cp312-win_amd64.whl", hash = "sha256:17671e3f4ae62fe93beded6802e5349fd5e3f1f3bcf47346f291c3cca19c79f3"}, {file = "paddlepaddle-3.0.0b1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e1a4a50c5623057614f9831c099be97903d8707b4e6469da601953f5816d7db0"}, {file = "paddlepaddle-3.0.0b1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:be1338b7a8dd4695eed0be2655ddb6e77c9152828ec87c6a36fa3c54ed3ecdbd"}, {file = "paddlepaddle-3.0.0b1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:2e84f2a9e8798a5f461d794d802f28c03fc2edb734fe944f0f6cfba9c22dc507"}, {file = "paddlepaddle-3.0.0b1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:8890e1fb7d964ff01cf3feab6695c2f3be86f1f83d953a3cbee24d154b4541bc"}, + {file = "paddlepaddle-3.0.0b1-cp38-cp38-win_amd64.whl", hash = "sha256:78e579f983476a960962dcabf1978bf181127cdfe9290a453aecfc38f5fc8aaa"}, {file = "paddlepaddle-3.0.0b1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0943e83ca4592e3d408e0db3be59982eb5e8be4414a5c926091a3c1a27d5969a"}, {file = "paddlepaddle-3.0.0b1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d0fda1fa4d592c8849dcca4e3a45f24c73b1078102850d1d1215e2c97bab627f"}, {file = "paddlepaddle-3.0.0b1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:79c4ba0e4480420f73d65c4d226d3e19d48ad21b09409eb6f09e0e0f728bd5d0"}, {file = "paddlepaddle-3.0.0b1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:c0ac4496f16db710758a4e0af536bdfe49d15c8fb799171ea03d1955b8fc66f1"}, + {file = "paddlepaddle-3.0.0b1-cp39-cp39-win_amd64.whl", hash = "sha256:32f08d3770511b0c1fcba49a42289ddeab1832c37c72ebc822c08f22f9298b43"}, ] [package.dependencies] @@ -9654,6 +9829,27 @@ mpmath = ">=1.1.0,<1.4" [package.extras] dev = ["hypothesis (>=6.70.0)", "pytest (>=7.1.0)"] +[[package]] +name = "tablestore" +version = "6.1.0" +description = "Aliyun TableStore(OTS) SDK" +optional = false +python-versions = "*" +files = [ + {file = "tablestore-6.1.0.tar.gz", hash = "sha256:bfe6a3e0fe88a230729723c357f4a46b8869a06a4b936db20692ed587a721c1c"}, +] + +[package.dependencies] +certifi = ">=2016.2.28" +crc32c = ">=2.7.1" +enum34 = ">=1.1.6" +flatbuffers = ">=22.9.24" +future = ">=0.16.0" +numpy = ">=1.11.0" +protobuf = ">=3.20.0,<=5.27.4" +six = ">=1.11.0" +urllib3 = ">=1.14" + [[package]] name = "tabulate" version = "0.9.0" @@ -11372,4 +11568,4 @@ testing = ["coverage[toml]", "zope.event", "zope.testing"] [metadata] lock-version = "2.0" python-versions = ">=3.11.0,<3.12" -content-hash = "7d401db94c6bab1c772b28cfa8bc0bccbde8b576cee95816b7253dce83d23357" +content-hash = "5064cebb285d041f2ca430995510ab64105d60d0af4354aaae98f6934e2848b2" diff --git a/pyproject.toml b/pyproject.toml index 1544a6e9..a2235d7d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -106,6 +106,7 @@ aspose-slides = "^24.10.0" ultralytics = "8.3.43" datasketch = "^1.6.5" primp = "0.9.1" +tablestore = "^6.1.0" [tool.poetry.scripts] pai_rag = "pai_rag.main:run" diff --git a/src/pai_rag/app/web/event_listeners.py b/src/pai_rag/app/web/event_listeners.py index 2aeff1fe..166e630d 100644 --- a/src/pai_rag/app/web/event_listeners.py +++ b/src/pai_rag/app/web/event_listeners.py @@ -161,6 +161,7 @@ def change_vectordb_conn(vectordb_type): milvus_visible = False opensearch_visible = False postgresql_visible = False + tablestore_visible = False if vectordb_type.lower() == "analyticdb": adb_visible = True elif vectordb_type.lower() == "hologres": @@ -175,6 +176,8 @@ def change_vectordb_conn(vectordb_type): opensearch_visible = True elif vectordb_type.lower() == "postgresql": postgresql_visible = True + elif vectordb_type.lower() == "tablestore": + tablestore_visible = True return [ gr.update(visible=adb_visible), @@ -184,6 +187,7 @@ def change_vectordb_conn(vectordb_type): gr.update(visible=milvus_visible), gr.update(visible=opensearch_visible), gr.update(visible=postgresql_visible), + gr.update(visible=tablestore_visible), ] diff --git a/src/pai_rag/app/web/index_utils.py b/src/pai_rag/app/web/index_utils.py index ab7638d8..72b3e251 100644 --- a/src/pai_rag/app/web/index_utils.py +++ b/src/pai_rag/app/web/index_utils.py @@ -19,6 +19,7 @@ MilvusVectorStoreConfig, OpenSearchVectorStoreConfig, PostgreSQLVectorStoreConfig, + TablestoreVectorStoreConfig, ) @@ -72,6 +73,11 @@ "postgresql_username", "postgresql_password", "postgresql_table_name", + "tablestore_endpoint", + "tablestore_instance_name", + "tablestore_access_key_id", + "tablestore_access_key_secret", + "tablestore_table_name", ] @@ -292,6 +298,26 @@ def index_to_components_settings( {"value": ""}, ] ) + if isinstance(vector_store_config, TablestoreVectorStoreConfig): + vector_component_settings.extend( + [ + {"value": vector_store_config.endpoint}, + {"value": vector_store_config.instance_name}, + {"value": vector_store_config.access_key_id}, + {"value": vector_store_config.access_key_secret}, + {"value": vector_store_config.table_name}, + ] + ) + else: + vector_component_settings.extend( + [ + {"value": ""}, + {"value": ""}, + {"value": ""}, + {"value": ""}, + {"value": ""}, + ] + ) component_settings = [ *index_component_settings, *embed_component_settings, @@ -359,6 +385,11 @@ def components_to_index( milvus_password, milvus_database, milvus_collection_name, + tablestore_endpoint, + tablestore_instance_name, + tablestore_access_key_id, + tablestore_access_key_secret, + tablestore_table_name, **kwargs, ) -> RagIndexEntry: if vector_index is None or vector_index.lower() == "new": @@ -443,6 +474,15 @@ def components_to_index( "username": postgresql_username, "password": postgresql_password, } + elif vectordb_type.lower() == "tablestore": + vector_store = { + "type": vectordb_type.lower(), + "endpoint": tablestore_endpoint, + "instance_name": tablestore_instance_name, + "access_key_id": tablestore_access_key_id, + "access_key_secret": tablestore_access_key_secret, + "table_name": tablestore_table_name, + } else: raise ValueError(f"Unknown vector db type: {vectordb_type}") diff --git a/src/pai_rag/app/web/tabs/vector_db_panel.py b/src/pai_rag/app/web/tabs/vector_db_panel.py index 78f7a105..83053f42 100644 --- a/src/pai_rag/app/web/tabs/vector_db_panel.py +++ b/src/pai_rag/app/web/tabs/vector_db_panel.py @@ -17,6 +17,7 @@ def create_vector_db_panel() -> Dict[str, Any]: "faiss", "opensearch", "postgresql", + "tablestore", ], label="Which VectorStore do you want to use?", elem_id="vectordb_type", @@ -232,6 +233,37 @@ def create_vector_db_panel() -> Dict[str, Any]: interactive=True, ) + with gr.Column(visible=(vectordb_type == "tablestore")) as tablestore_col: + with gr.Row(): + tablestore_endpoint = gr.Textbox( + label="tablestore_endpoint", + elem_id="tablestore_endpoint", + interactive=True, + ) + tablestore_instance_name = gr.Textbox( + label="tablestore_instance_name", + elem_id="tablestore_instance_name", + interactive=True, + ) + with gr.Row(): + tablestore_access_key_id = gr.Textbox( + label="tablestore_access_key_id", + elem_id="tablestore_access_key_id", + interactive=True, + ) + tablestore_access_key_secret = gr.Textbox( + label="tablestore_access_key_secret", + type="password", + elem_id="tablestore_access_key_secret", + interactive=True, + ) + with gr.Row(): + tablestore_table_name = gr.Textbox( + label="tablestore_table_name", + elem_id="tablestore_table_name", + interactive=True, + ) + vectordb_type.change( fn=ev_listeners.change_vectordb_conn, inputs=vectordb_type, @@ -243,6 +275,7 @@ def create_vector_db_panel() -> Dict[str, Any]: milvus_col, opensearch_col, postgresql_col, + tablestore_col, ], ) db_related_elements = [ @@ -291,6 +324,12 @@ def create_vector_db_panel() -> Dict[str, Any]: adb_account, adb_account_password, adb_namespace, + # tablestore + tablestore_endpoint, + tablestore_instance_name, + tablestore_access_key_id, + tablestore_access_key_secret, + tablestore_table_name, ] components.extend(db_related_elements) return db_related_elements, components_to_dict(components) diff --git a/src/pai_rag/integrations/index/pai/utils/vector_store_utils.py b/src/pai_rag/integrations/index/pai/utils/vector_store_utils.py index 5a647836..5ffc02b5 100644 --- a/src/pai_rag/integrations/index/pai/utils/vector_store_utils.py +++ b/src/pai_rag/integrations/index/pai/utils/vector_store_utils.py @@ -2,12 +2,16 @@ import faiss import os import json +import tablestore from llama_index.core.vector_stores.simple import DEFAULT_VECTOR_STORE, NAMESPACE_SEP from llama_index.core.vector_stores.types import DEFAULT_PERSIST_FNAME from elasticsearch.helpers.vectorstore import AsyncDenseVectorStrategy from pai_rag.integrations.index.pai.utils.sparse_embed_function import ( BGEM3SparseEmbeddingFunction, ) +from pai_rag.integrations.vector_stores.tablestore.tablestore import ( + TablestoreVectorStore, +) from pai_rag.integrations.vector_stores.hologres.hologres import HologresVectorStore from pai_rag.integrations.vector_stores.elasticsearch.my_elasticsearch import ( MyElasticsearchStore, @@ -27,6 +31,7 @@ ElasticSearchVectorStoreConfig, OpenSearchVectorStoreConfig, HologresVectorStoreConfig, + TablestoreVectorStoreConfig, ) @@ -57,6 +62,8 @@ def create_vector_store( create_vector_store_func = create_postgresql elif isinstance(vectordb_config, OpenSearchVectorStoreConfig): create_vector_store_func = create_opensearch + elif isinstance(vectordb_config, TablestoreVectorStoreConfig): + create_vector_store_func = create_tablestore else: raise ValueError(f"Unknown vector store config {vectordb_config}.") @@ -236,6 +243,86 @@ def create_opensearch( return opensearch_store +def create_tablestore( + tablestore_config: TablestoreVectorStoreConfig, + embed_dims: int, + is_image_store: bool = False, +): + table_name = tablestore_config.table_name + if is_image_store: + table_name = f"{table_name}__image" + + tablestore_store = TablestoreVectorStore( + endpoint=tablestore_config.endpoint, + instance_name=tablestore_config.instance_name, + access_key_id=tablestore_config.access_key_id, + access_key_secret=tablestore_config.access_key_secret, + table_name=table_name, + index_name="pai_rag_vector_store_ots_index_v1", + vector_dimension=embed_dims, + # metadata mapping is used to filter non-vector fields. + metadata_mappings=[ + tablestore.FieldSchema( + "file_name", + tablestore.FieldType.KEYWORD, + index=True, + enable_sort_and_agg=True, + ), + tablestore.FieldSchema( + "file_type", + tablestore.FieldType.KEYWORD, + index=True, + enable_sort_and_agg=True, + ), + tablestore.FieldSchema( + "file_size", + tablestore.FieldType.LONG, + index=True, + enable_sort_and_agg=True, + ), + tablestore.FieldSchema( + "file_path", + tablestore.FieldType.TEXT, + index=True, + enable_sort_and_agg=False, + ), + tablestore.FieldSchema( + "image_url", + tablestore.FieldType.TEXT, + index=True, + enable_sort_and_agg=False, + ), + tablestore.FieldSchema( + "creation_date", + tablestore.FieldType.DATE, + index=True, + enable_sort_and_agg=True, + date_formats=[ + "yyyy-MM-dd", + "yyyy-MM-dd HH:mm", + "yyyy-MM-dd HH:mm:ss", + "yyyy-MM-dd HH:mm:ss.SSS", + ], + ), + tablestore.FieldSchema( + "last_modified_date", + tablestore.FieldType.DATE, + index=True, + enable_sort_and_agg=True, + date_formats=[ + "yyyy-MM-dd", + "yyyy-MM-dd HH:mm", + "yyyy-MM-dd HH:mm:ss", + "yyyy-MM-dd HH:mm:ss.SSS", + ], + ), + ], + ) + tablestore_store.create_table_if_not_exist() + tablestore_store.create_search_index_if_not_exist() + return tablestore_store + + def create_postgresql( pg_config: PostgreSQLVectorStoreConfig, embed_dims: int, diff --git a/src/pai_rag/integrations/index/pai/vector_store_config.py b/src/pai_rag/integrations/index/pai/vector_store_config.py index 3bc4091e..0219817f 100644 --- a/src/pai_rag/integrations/index/pai/vector_store_config.py +++ b/src/pai_rag/integrations/index/pai/vector_store_config.py @@ -13,6 +13,7 @@ class SupportedVectorStoreType(str, Enum): opensearch = "opensearch" milvus = "milvus" hologres = "hologres" + tablestore = "tablestore" class VectorIndexRetrievalType(str, Enum): @@ -25,6 +26,7 @@ class VectorIndexRetrievalType(str, Enum): SupportedVectorStoreType.elasticsearch, SupportedVectorStoreType.postgresql, SupportedVectorStoreType.milvus, + SupportedVectorStoreType.tablestore, ] @@ -99,6 +101,17 @@ class OpenSearchVectorStoreConfig(BaseVectorStoreConfig): table_name: str +class TablestoreVectorStoreConfig(BaseVectorStoreConfig): + type: Literal[ + SupportedVectorStoreType.tablestore + ] = SupportedVectorStoreType.tablestore + endpoint: str + instance_name: str + access_key_id: str + access_key_secret: str + table_name: str + + class PostgreSQLVectorStoreConfig(BaseVectorStoreConfig): type: Literal[ SupportedVectorStoreType.postgresql diff --git a/src/pai_rag/integrations/vector_stores/tablestore/tablestore.py b/src/pai_rag/integrations/vector_stores/tablestore/tablestore.py new file mode 100644 index 00000000..e058a822 --- /dev/null +++ b/src/pai_rag/integrations/vector_stores/tablestore/tablestore.py @@ -0,0 +1,721 @@ +"""Tablestore vector store.""" + +import json +import traceback +from logging import getLogger +from typing import Any, List, Optional, Dict + +import tablestore +from llama_index.core.bridge.pydantic import PrivateAttr +from llama_index.core.schema import BaseNode, TextNode +from llama_index.core.vector_stores.types import ( + BasePydanticVectorStore, + MetadataFilters, + VectorStoreQuery, + VectorStoreQueryResult, + FilterCondition, + MetadataFilter, + FilterOperator, +) + + +class TablestoreVectorStore(BasePydanticVectorStore): + """`Tablestore` vector store. + + To use, you should have the ``tablestore`` python package installed. + + Examples: + ```python + import tablestore + import os + + store = TablestoreVectorStore( + endpoint=os.getenv("end_point"), + instance_name=os.getenv("instance_name"), + access_key_id=os.getenv("access_key_id"), + access_key_secret=os.getenv("access_key_secret"), + vector_dimension=512, + vector_metric_type=tablestore.VectorMetricType.VM_COSINE, + # metadata mapping is used to filter non-vector fields. + metadata_mappings=[ + tablestore.FieldSchema( + "type", + tablestore.FieldType.KEYWORD, + index=True, + enable_sort_and_agg=True, + ), + tablestore.FieldSchema( + "time", tablestore.FieldType.LONG, index=True, enable_sort_and_agg=True + ), + ], + ) + ``` + """ + + is_embedding_query: bool = True + stores_text: bool = True + _vector_dimension: int = PrivateAttr(default=512) + _logger: Any = PrivateAttr(default=None) + _tablestore_client: tablestore.OTSClient = PrivateAttr(default=None) + _table_name: str = PrivateAttr(default="llama_index_vector_store_ots_v1") + _index_name: str = PrivateAttr(default="llama_index_vector_store_ots_index_v1") + _text_field: str = PrivateAttr(default="content") + _vector_field: str = PrivateAttr(default="embedding") + _ref_doc_id_field: str = PrivateAttr(default="ref_doc_id") + _metadata_mappings: List[tablestore.FieldSchema] = PrivateAttr(default=None) + + def __init__( + self, + tablestore_client: Optional[tablestore.OTSClient] = None, + endpoint: Optional[str] = None, + instance_name: Optional[str] = None, + access_key_id: Optional[str] = None, + access_key_secret: Optional[str] = None, + table_name: str = "llama_index_vector_store_ots_v1", + index_name: str = "llama_index_vector_store_ots_index_v1", + text_field: str = "content", + vector_field: str = "embedding", + ref_doc_id_field: str = "ref_doc_id", + vector_dimension: int = 512, + vector_metric_type: tablestore.VectorMetricType = tablestore.VectorMetricType.VM_COSINE, + metadata_mappings: Optional[List[tablestore.FieldSchema]] = None, + ) -> None: + super().__init__() + self._logger = getLogger(__name__) + if not tablestore_client: + self._tablestore_client = tablestore.OTSClient( + endpoint, + access_key_id, + access_key_secret, + instance_name, + retry_policy=tablestore.WriteRetryPolicy(), + ) + else: + self._tablestore_client = tablestore_client + self._vector_dimension = vector_dimension + self._table_name = table_name + self._index_name = index_name + self._text_field = text_field + self._vector_field = vector_field + self._ref_doc_id_field = ref_doc_id_field + + self._metadata_mappings = [ + tablestore.FieldSchema( + text_field, + tablestore.FieldType.TEXT, + index=True, + enable_sort_and_agg=False, + store=False, + analyzer=tablestore.AnalyzerType.MAXWORD, + ), + tablestore.FieldSchema( + ref_doc_id_field, + tablestore.FieldType.KEYWORD, + index=True, + enable_sort_and_agg=True, + store=False, + ), + tablestore.FieldSchema( + vector_field, + tablestore.FieldType.VECTOR, + vector_options=tablestore.VectorOptions( + data_type=tablestore.VectorDataType.VD_FLOAT_32, + dimension=vector_dimension, + metric_type=vector_metric_type, + ), + ), + ] + if metadata_mappings: + for mapping in metadata_mappings: + if ( + mapping.field_name == text_field + or mapping.field_name == vector_field + or mapping.field_name == ref_doc_id_field + ): + continue + self._metadata_mappings.append(mapping) + + def create_table_if_not_exist(self) -> None: + """Create table if not exist.""" + table_list = self._tablestore_client.list_table() + if self._table_name in table_list: + self._logger.info( + "Tablestore system table[%s] already exists", self._table_name + ) + return + self._logger.info( + "Tablestore system table[%s] does not exist, try to create the table.", + self._table_name, + ) + + schema_of_primary_key = [("id", "STRING")] + table_meta = tablestore.TableMeta(self._table_name, schema_of_primary_key) + table_options = tablestore.TableOptions() + reserved_throughput = tablestore.ReservedThroughput( + tablestore.CapacityUnit(0, 0) + ) + try: + self._tablestore_client.create_table( + table_meta, table_options, reserved_throughput + ) + self._logger.info( + "Tablestore create table[%s] successfully.", self._table_name + ) + except tablestore.OTSClientError as e: + traceback.print_exc() + self._logger.exception( + "Tablestore create system table[%s] failed with client error, http_status:%d, error_message:%s", + self._table_name, + e.get_http_status(), + e.get_error_message(), + ) + except tablestore.OTSServiceError as e: + traceback.print_exc() + self._logger.exception( + "Tablestore create system table[%s] failed with client error, http_status:%d, error_code:%s, error_message:%s, request_id:%s", + self._table_name, + e.get_http_status(), + e.get_error_code(), + e.get_error_message(), + e.get_request_id(), + ) + + def create_search_index_if_not_exist(self) -> None: + """Create search index if not exist.""" + search_index_list = self._tablestore_client.list_search_index( + table_name=self._table_name + ) + if self._index_name in [t[1] for t in search_index_list]: + self._logger.info( + "Tablestore system index[%s] already exists", self._index_name + ) + return + index_meta = tablestore.SearchIndexMeta(self._metadata_mappings) + self._tablestore_client.create_search_index( + self._table_name, self._index_name, index_meta + ) + self._logger.info( + "Tablestore create system index[%s] successfully.", self._index_name + ) + + def delete_table_if_exists(self): + """Delete table if exists.""" + search_index_list = self._tablestore_client.list_search_index( + table_name=self._table_name + ) + for resp_tuple in search_index_list: + self._tablestore_client.delete_search_index(resp_tuple[0], resp_tuple[1]) + self._logger.info( + "Tablestore delete index[%s] successfully.", self._index_name + ) + self._tablestore_client.delete_table(self._table_name) + self._logger.info( + "Tablestore delete system table[%s] successfully.", self._index_name + ) + + def delete_search_index(self, table_name, index_name) -> None: + self._tablestore_client.delete_search_index(table_name, index_name) + self._logger.info("Tablestore delete index[%s] successfully.", self._index_name) + + def _write_row( + self, + row_id: str, + content: str, + embedding_vector: List[float], + metadata: Dict[str, Any], + ) -> None: + primary_key = [("id", row_id)] + attribute_columns = [ + (self._text_field, content), + (self._vector_field, json.dumps(embedding_vector)), + ] + for k, v in metadata.items(): + item = (k, v) + attribute_columns.append(item) + row = tablestore.Row(primary_key, attribute_columns) + + try: + self._tablestore_client.put_row(self._table_name, row) + self._logger.debug( + "Tablestore put row successfully. id:%s, content:%s, meta_data:%s", + row_id, + content, + metadata, + ) + except tablestore.OTSClientError as e: + self._logger.exception( + "Tablestore put row failed with client error:%s, id:%s, content:%s, meta_data:%s", + e, + row_id, + content, + metadata, + ) + except tablestore.OTSServiceError as e: + self._logger.exception( + "Tablestore put row failed with client error:%s, id:%s, content:%s, meta_data:%s, http_status:%d, error_code:%s, error_message:%s, request_id:%s", + e, + row_id, + content, + metadata, + e.get_http_status(), + e.get_error_code(), + e.get_error_message(), + e.get_request_id(), + ) + + def _delete_row(self, row_id: str) -> None: + primary_key = [("id", row_id)] + try: + self._tablestore_client.delete_row(self._table_name, primary_key, None) + self._logger.info("Tablestore delete row successfully. id:%s", row_id) + except tablestore.OTSClientError as e: + self._logger.exception( + "Tablestore delete row failed with client error:%s, id:%s", e, row_id + ) + except tablestore.OTSServiceError as e: + self._logger.exception( + "Tablestore delete row failed with client error:%s, id:%s, http_status:%d, error_code:%s, error_message:%s, request_id:%s", + e, + row_id, + e.get_http_status(), + e.get_error_code(), + e.get_error_message(), + e.get_request_id(), + ) + + def _delete_all(self) -> None: + inclusive_start_primary_key = [("id", tablestore.INF_MIN)] + exclusive_end_primary_key = [("id", tablestore.INF_MAX)] + total = 0 + try: + while True: + ( + consumed, + next_start_primary_key, + row_list, + next_token, + ) = self._tablestore_client.get_range( + self._table_name, + tablestore.Direction.FORWARD, + inclusive_start_primary_key, + exclusive_end_primary_key, + [], + 5000, + max_version=1, + ) + for row in row_list: + self._tablestore_client.delete_row( + self._table_name, row.primary_key, None + ) + total += 1 + if next_start_primary_key is not None: + inclusive_start_primary_key = next_start_primary_key + else: + break + except tablestore.OTSClientError as e: + self._logger.exception( + "Tablestore delete row failed with client error:%s", e + ) + except tablestore.OTSServiceError as e: + self._logger.exception( + "Tablestore delete row failed with client error:%s, http_status:%d, error_code:%s, error_message:%s, request_id:%s", + e, + e.get_http_status(), + e.get_error_code(), + e.get_error_message(), + e.get_request_id(), + ) + self._logger.info("delete all rows count:%d", total) + + def _search( + self, query: VectorStoreQuery, knn_top_k: int + ) -> VectorStoreQueryResult: + filter_query = self._parse_filters(query.filters) + ots_query = tablestore.KnnVectorQuery( + field_name=self._vector_field, + top_k=knn_top_k, + float32_query_vector=query.query_embedding, + filter=filter_query, + ) + sort = tablestore.Sort( + sorters=[tablestore.ScoreSort(sort_order=tablestore.SortOrder.DESC)] + ) + search_query = tablestore.SearchQuery( + ots_query, limit=query.similarity_top_k, get_total_count=False, sort=sort + ) + try: + search_response = self._tablestore_client.search( + table_name=self._table_name, + index_name=self._index_name, + search_query=search_query, + columns_to_get=tablestore.ColumnsToGet( + return_type=tablestore.ColumnReturnType.ALL + ), + ) + self._logger.info( + "Tablestore search successfully. request_id:%s", + search_response.request_id, + ) + return self._to_query_result(search_response) + except tablestore.OTSClientError as e: + self._logger.exception("Tablestore search failed with client error:%s", e) + except tablestore.OTSServiceError as e: + self._logger.exception( + "Tablestore search failed with client error:%s, http_status:%d, error_code:%s, error_message:%s, request_id:%s", + e, + e.get_http_status(), + e.get_error_code(), + e.get_error_message(), + e.get_request_id(), + ) + + def _filter( + self, + filters: Optional[MetadataFilters] = None, + return_type: Optional[ + tablestore.ColumnReturnType + ] = tablestore.ColumnReturnType.ALL, + limit: Optional[int] = 100, + ) -> List: + if filters is None: + return [] + filter_query = self._parse_filters(filters) + search_query = tablestore.SearchQuery( + filter_query, limit=1, get_total_count=False + ) + all_rows = [] + try: + # first round + search_response = self._tablestore_client.search( + table_name=self._table_name, + index_name=self._index_name, + search_query=search_query, + columns_to_get=tablestore.ColumnsToGet(return_type=return_type), + ) + all_rows.extend(search_response.rows) + # loop + while search_response.next_token: + search_query.next_token = search_response.next_token + search_response = self._tablestore_client.search( + table_name=self._table_name, + index_name=self._index_name, + search_query=search_query, + columns_to_get=tablestore.ColumnsToGet(return_type=return_type), + ) + all_rows.extend(search_response.rows) + return all_rows + except tablestore.OTSClientError as e: + self._logger.exception("Tablestore search failed with client error:%s", e) + except tablestore.OTSServiceError as e: + self._logger.exception( + "Tablestore search failed with client error:%s, http_status:%d, error_code:%s, error_message:%s, request_id:%s", + e, + e.get_http_status(), + e.get_error_code(), + e.get_error_message(), + e.get_request_id(), + ) + + def _to_get_nodes_result(self, rows) -> List[TextNode]: + nodes = [] + for row in rows: + node_id = row[0][0][1] + meta_data = {} + text = None + embedding = None + for col in row[1]: + key = col[0] + val = col[1] + if key == self._text_field: + text = val + continue + if key == self._vector_field: + embedding = json.loads(val) + continue + meta_data[key] = val + node = TextNode( + id_=node_id, + text=text, + metadata=meta_data, + embedding=embedding, + ) + nodes.append(node) + return nodes + + def _get_row(self, row_id: str) -> Optional[TextNode]: + primary_key = [("id", row_id)] + try: + _, row, _ = self._tablestore_client.get_row( + self._table_name, primary_key, None, None, 1 + ) + self._logger.debug("Tablestore get row successfully. id:%s", row_id) + if row is None: + return None + node_id = row.primary_key[0][1] + meta_data = {} + text = None + embedding = None + for col in row.attribute_columns: + key = col[0] + val = col[1] + if key == self._text_field: + text = val + continue + if key == self._vector_field: + embedding = json.loads(val) + continue + meta_data[key] = val + return TextNode( + id_=node_id, + text=text, + metadata=meta_data, + embedding=embedding, + ) + except tablestore.OTSClientError as e: + self._logger.exception( + "Tablestore get row failed with client error:%s, id:%s", e, row_id + ) + except tablestore.OTSServiceError as e: + self._logger.exception( + "Tablestore get row failed with client error:%s, " + "id:%s, http_status:%d, error_code:%s, error_message:%s, request_id:%s", + e, + row_id, + e.get_http_status(), + e.get_error_code(), + e.get_error_message(), + e.get_request_id(), + ) + + def _to_query_result(self, search_response) -> VectorStoreQueryResult: + nodes = [] + ids = [] + similarities = [] + for hit in search_response.search_hits: + row = hit.row + score = hit.score + node_id = row[0][0][1] + meta_data = {} + text = None + embedding = None + for col in row[1]: + key = col[0] + val = col[1] + if key == self._text_field: + text = val + continue + if key == self._vector_field: + embedding = json.loads(val) + continue + meta_data[key] = val + node = TextNode( + id_=node_id, + text=text, + metadata=meta_data, + embedding=embedding, + ) + ids.append(node_id) + nodes.append(node) + similarities.append(score) + return VectorStoreQueryResult(nodes=nodes, ids=ids, similarities=similarities) + + def _parse_filters_recursively( + self, filters: MetadataFilters + ) -> tablestore.BoolQuery: + """Parse (possibly nested) MetadataFilters to equivalent tablestore search expression.""" + bool_query = tablestore.BoolQuery( + must_queries=[], + must_not_queries=[], + filter_queries=[], + should_queries=[], + minimum_should_match=None, + ) + if filters.condition is FilterCondition.AND: + bool_clause = bool_query.must_queries + elif filters.condition is FilterCondition.OR: + bool_clause = bool_query.should_queries + else: + raise ValueError(f"Unsupported filter condition: {filters.condition}") + + for filter_item in filters.filters: + if isinstance(filter_item, MetadataFilter): + bool_clause.append(self._parse_filter(filter_item)) + elif isinstance(filter_item, MetadataFilters): + bool_clause.append(self._parse_filters_recursively(filter_item)) + else: + raise ValueError(f"Unsupported filter type: {type(filter_item)}") + + return bool_query + + def _parse_filters(self, filters: Optional[MetadataFilters]) -> tablestore.Query: + """Parse MetadataFilters to equivalent OpenSearch expression.""" + if filters is None: + return tablestore.MatchAllQuery() + return self._parse_filters_recursively(filters=filters) + + @staticmethod + def _parse_filter(filter_item: MetadataFilter) -> tablestore.Query: + key = filter_item.key + val = filter_item.value + op = filter_item.operator + + if op == FilterOperator.EQ: + return tablestore.TermQuery(field_name=key, column_value=val) + elif op == FilterOperator.GT: + return tablestore.RangeQuery( + field_name=key, range_from=val, include_lower=False + ) + elif op == FilterOperator.GTE: + return tablestore.RangeQuery( + field_name=key, range_from=val, include_lower=True + ) + elif op == FilterOperator.LT: + return tablestore.RangeQuery( + field_name=key, range_to=val, include_upper=False + ) + elif op == FilterOperator.LTE: + return tablestore.RangeQuery( + field_name=key, range_to=val, include_upper=True + ) + elif op == FilterOperator.NE: + bq = tablestore.BoolQuery( + must_queries=[], + must_not_queries=[], + filter_queries=[], + should_queries=[], + minimum_should_match=None, + ) + bq.must_not_queries.append( + tablestore.TermQuery(field_name=key, column_value=val) + ) + return bq + elif op in [FilterOperator.IN, FilterOperator.ANY]: + return tablestore.TermsQuery(field_name=key, column_values=val) + elif op == FilterOperator.NIN: + bq = tablestore.BoolQuery( + must_queries=[], + must_not_queries=[], + filter_queries=[], + should_queries=[], + minimum_should_match=None, + ) + bq.must_not_queries.append( + tablestore.TermsQuery(field_name=key, column_values=val) + ) + return bq + elif op == FilterOperator.ALL: + bq = tablestore.BoolQuery( + must_queries=[], + must_not_queries=[], + filter_queries=[], + should_queries=[], + minimum_should_match=None, + ) + for val_item in val: + bq.must_queries.append( + tablestore.TermQuery(field_name=key, column_value=val_item) + ) + return bq + elif op == FilterOperator.TEXT_MATCH: + return tablestore.MatchQuery(field_name=key, text=val) + elif op == FilterOperator.CONTAINS: + return tablestore.WildcardQuery(field_name=key, value=f"*{val}*") + else: + raise ValueError(f"Unsupported filter operator: {filter_item.operator}") + + @property + def client(self) -> Any: + """Get client.""" + return self._tablestore_client + + def add(self, nodes: List[BaseNode], **kwargs: Any) -> List[str]: + """Add nodes to vector store.""" + if len(nodes) == 0: + return [] + ids = [] + for node in nodes: + if len(node.get_embedding()) != self._vector_dimension: + raise RuntimeError( + "node embedding size:%d is not the same as vector store dim:%d" + % (len(node.get_embedding()), self._vector_dimension) + ) + self._write_row( + row_id=node.node_id, + content=node.text, + embedding_vector=node.get_embedding(), + metadata=node.metadata, + ) + ids.append(node.node_id) + return ids + + def delete_nodes( + self, + node_ids: Optional[List[str]] = None, + filters: Optional[MetadataFilters] = None, + **delete_kwargs: Any, + ) -> None: + """Delete nodes from vector store.""" + if node_ids is None and filters is None: + raise RuntimeError("node_ids and filters cannot be None at the same time.") + if node_ids is not None and filters is not None: + raise RuntimeError("node_ids and filters cannot be set at the same time.") + if filters is not None: + rows = self._filter( + filters=filters, return_type=tablestore.ColumnReturnType.NONE + ) + for row in rows: + self._delete_row(row[0][0][1]) + if node_ids is not None: + for node_id in node_ids: + self._delete_row(node_id) + + def get_nodes( + self, + node_ids: Optional[List[str]] = None, + filters: Optional[MetadataFilters] = None, + ) -> List[BaseNode]: + """Get nodes from vector store.""" + if node_ids is None and filters is None: + raise RuntimeError("node_ids and filters cannot be None at the same time.") + if node_ids is not None and filters is not None: + raise RuntimeError("node_ids and filters cannot be set at the same time.") + if filters is not None: + rows = self._filter( + filters=filters, return_type=tablestore.ColumnReturnType.ALL + ) + return self._to_get_nodes_result(rows) + if node_ids is not None: + nodes = [] + for node_id in node_ids: + nodes.append(self._get_row(node_id)) + return nodes + return [] + + def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None: + """Delete nodes using with ref_doc_id.""" + rows = self._filter( + filters=MetadataFilters( + filters=[ + MetadataFilter( + key=self._ref_doc_id_field, + value=ref_doc_id, + operator=FilterOperator.EQ, + ), + ], + condition=FilterCondition.AND, + ), + return_type=tablestore.ColumnReturnType.NONE, + ) + for row in rows: + self._delete_row(row[0][0][1]) + + def clear(self) -> None: + """Clear all nodes from configured vector store.""" + self._delete_all() + + def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult: + """Query vector store.""" + knn_top_k = query.similarity_top_k + if "knn_top_k" in kwargs: + knn_top_k = kwargs["knn_top_k"] + return self._search(query=query, knn_top_k=knn_top_k)