diff --git a/docs/_toc.yml b/docs/_toc.yml index 38309dbcfc..361cb5a606 100644 --- a/docs/_toc.yml +++ b/docs/_toc.yml @@ -91,6 +91,7 @@ parts: - file: source/reference/vector_databases/pinecone - file: source/reference/vector_databases/milvus - file: source/reference/vector_databases/weaviate + - file: source/reference/vector_databases/marqo - file: source/reference/ai/index title: AI Engines diff --git a/docs/source/reference/vector_databases/marqo.rst b/docs/source/reference/vector_databases/marqo.rst new file mode 100644 index 0000000000..3c9a1ff176 --- /dev/null +++ b/docs/source/reference/vector_databases/marqo.rst @@ -0,0 +1,27 @@ +Marqo +========== + +Marqo is more than a vector database, it's an end-to-end vector search engine for both text and images. +Vector generation, storage and retrieval are handled out of the box through a single API. +Connection is based on the `marqo `_ client library. + +Dependency +---------- + +* marqo==2.1.0 + + +Setup +----- + +To use marqo, you need a URL and API key to your instance. +Here are `instructions to setup local instance `_. +Cloud offering can also be used. + + +Create Index +----------------- + +.. code-block:: sql + + CREATE INDEX index_name ON table_name (data) USING MARQO; \ No newline at end of file diff --git a/evadb/catalog/catalog_type.py b/evadb/catalog/catalog_type.py index 5da568779f..6282bb63d7 100644 --- a/evadb/catalog/catalog_type.py +++ b/evadb/catalog/catalog_type.py @@ -119,6 +119,7 @@ class VectorStoreType(EvaDBEnum): CHROMADB # noqa: F821 WEAVIATE # noqa: F821 MILVUS # noqa: F821 + MARQO # noqa: F821 class VideoColumnName(EvaDBEnum): diff --git a/evadb/interfaces/relational/db.py b/evadb/interfaces/relational/db.py index 714593a8a8..d4835c4254 100644 --- a/evadb/interfaces/relational/db.py +++ b/evadb/interfaces/relational/db.py @@ -269,7 +269,7 @@ def create_vector_index( table_name (str): Name of the table. expr (str): Expression used to build the vector index. - using (str): Method used for indexing, can be `FAISS` or `QDRANT` or `PINECONE` or `CHROMADB` or `WEAVIATE` or `MILVUS`. + using (str): Method used for indexing, can be `FAISS` or `QDRANT` or `PINECONE` or `CHROMADB` or `WEAVIATE` or `MILVUS` or `MARQO`. Returns: EvaDBCursor: The EvaDBCursor object. diff --git a/evadb/parser/lark_visitor/_create_statements.py b/evadb/parser/lark_visitor/_create_statements.py index 175f0087e9..cf15a76e81 100644 --- a/evadb/parser/lark_visitor/_create_statements.py +++ b/evadb/parser/lark_visitor/_create_statements.py @@ -304,6 +304,8 @@ def vector_store_type(self, tree): vector_store_type = VectorStoreType.WEAVIATE elif str.upper(token) == "MILVUS": vector_store_type = VectorStoreType.MILVUS + elif str.upper(token) == "MARQO": + vector_store_type = VectorStoreType.MARQO return vector_store_type diff --git a/evadb/third_party/vector_stores/marqo.py b/evadb/third_party/vector_stores/marqo.py new file mode 100644 index 0000000000..204d801a13 --- /dev/null +++ b/evadb/third_party/vector_stores/marqo.py @@ -0,0 +1,111 @@ +# coding=utf-8 +# Copyright 2018-2023 EvaDB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import List + +from evadb.third_party.vector_stores.types import ( + FeaturePayload, + VectorIndexQuery, + VectorIndexQueryResult, + VectorStore, +) +from evadb.utils.generic_utils import try_to_import_marqo_client + +_marqo_client_instance = None + +required_params = ["url", "index_name"] + + +def get_marqo_client(url: str, api_key: str = None): + global _marqo_client_instance + if _marqo_client_instance is None: + try_to_import_marqo_client() + import marqo as mq + + _marqo_client_instance = mq.Client(url=url, api_key=api_key) + return _marqo_client_instance + + +class MarqoVectorStore(VectorStore): + def __init__( + self, index_name: str, url: str = "http://0.0.0.0:8882", api_key=None + ) -> None: + self._client = get_marqo_client(url=url) + self._index_name = index_name + + def create(self, vector_dim: int): + + # Delete index if exists already + if self._index_name in [ + i.index_name for i in self._client.get_indexes()["results"] + ]: + self.delete() + + # create fresh + # Refer here for details - https://docs.marqo.ai/2.0.0/API-Reference/Indexes/create_index/ + self._client.create_index( + index_name=self._index_name, + settings_dict={ + "index_defaults": { + "model": "no_model", + "model_properties": {"dimensions": vector_dim}, + "normalize_embeddings": True, + "ann_parameters": {"space_type": "cosinesimil"}, + } + }, + ) + + def add(self, payload: List[FeaturePayload]): + + ids = [int(row.id) for row in payload] + embeddings = [row.embedding for row in payload] + + data = [] + for _id, _emb in zip(ids, embeddings): + _id = str(_id) + data.append({"_id": _id, "evadb_data": {"vector": _emb}}) + + # For reference and more information + # check - https://docs.marqo.ai/1.4.0/Guides/Advanced-Usage/document_fields/#custom-vector-object + self._client.index(index_name=self._index_name).add_documents( + documents=data, + mappings={"evadb_data": {"type": "custom_vector"}}, + tensor_fields=["evadb_data"], + auto_refresh=True, + client_batch_size=64, + ) + + def delete(self) -> None: + self._client.delete_index(index_name=self._index_name) + + def query( + self, + query: VectorIndexQuery, + ) -> VectorIndexQueryResult: + response = self._client.index(self._index_name).search( + context={ + "tensor": [{"vector": list(query.embedding), "weight": 1}], + }, + limit=query.top_k, + ) + + similarities, ids = [], [] + + for result in response["hits"]: + ids.append(result["_id"]) + + # Because it is similarity score + similarities.append(1 - result["_score"]) + + return VectorIndexQueryResult(similarities=similarities, ids=ids) diff --git a/evadb/third_party/vector_stores/utils.py b/evadb/third_party/vector_stores/utils.py index 9c12fc6fbd..8478126caf 100644 --- a/evadb/third_party/vector_stores/utils.py +++ b/evadb/third_party/vector_stores/utils.py @@ -15,6 +15,7 @@ from evadb.catalog.catalog_type import VectorStoreType from evadb.third_party.vector_stores.chromadb import ChromaDBVectorStore from evadb.third_party.vector_stores.faiss import FaissVectorStore +from evadb.third_party.vector_stores.marqo import MarqoVectorStore from evadb.third_party.vector_stores.milvus import MilvusVectorStore from evadb.third_party.vector_stores.pinecone import PineconeVectorStore from evadb.third_party.vector_stores.qdrant import QdrantVectorStore @@ -67,5 +68,11 @@ def init_vector_store( validate_kwargs(kwargs, allowed_params, required_params) return MilvusVectorStore(index_name, **kwargs) + elif vector_store_type == VectorStoreType.MARQO: + from evadb.third_party.vector_stores.marqo import required_params + + validate_kwargs(kwargs, required_params, required_params) + return MarqoVectorStore(index_name, **kwargs) + else: raise Exception(f"Vector store {vector_store_type} not supported") diff --git a/evadb/utils/generic_utils.py b/evadb/utils/generic_utils.py index 426719f87c..f021129bac 100644 --- a/evadb/utils/generic_utils.py +++ b/evadb/utils/generic_utils.py @@ -593,6 +593,16 @@ def try_to_import_milvus_client(): ) +def try_to_import_marqo_client(): + try: + import marqo # noqa: F401 + except ImportError: + raise ValueError( + """Could not import marqo python package. + Please install it with `pip install marqo`.""" + ) + + def is_qdrant_available() -> bool: try: try_to_import_qdrant_client() @@ -633,6 +643,14 @@ def is_milvus_available() -> bool: return False +def is_marqo_available() -> bool: + try: + try_to_import_marqo_client() + return True + except ValueError: + return False + + ############################## ## UTILS ############################## diff --git a/setup.py b/setup.py index e3d211ece8..e0dd5ee700 100644 --- a/setup.py +++ b/setup.py @@ -116,6 +116,8 @@ def read(path, encoding="utf-8"): milvus_libs = ["pymilvus>=2.3.0"] +marqo_libs = ["marqo"] + postgres_libs = [ "psycopg2", @@ -177,6 +179,7 @@ def read(path, encoding="utf-8"): "chromadb": chromadb_libs, "milvus": milvus_libs, "weaviate": weaviate_libs, + "marqo": marqo_libs, "postgres": postgres_libs, "ludwig": ludwig_libs, "sklearn": sklearn_libs,