Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add marqo vector store integration #1443

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/_toc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ parts:
- file: source/reference/vector_databases/pinecone
- file: source/reference/vector_databases/milvus
- file: source/reference/vector_databases/weaviate
- file: source/reference/vector_databases/marqo

- file: source/reference/ai/index
title: AI Engines
Expand Down
27 changes: 27 additions & 0 deletions docs/source/reference/vector_databases/marqo.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
Marqo
==========

Marqo is more than a vector database, it's an end-to-end vector search engine for both text and images.
Vector generation, storage and retrieval are handled out of the box through a single API.
Connection is based on the `marqo <https://github.com/marqo-ai/py-marqo>`_ client library.

Dependency
----------

* marqo==2.1.0


Setup
-----

To use marqo, you need a URL and API key to your instance.
Here are `instructions to setup local instance <https://github.com/marqo-ai/marqo#getting-started>`_.
Cloud offering can also be used.


Create Index
-----------------

.. code-block:: sql

CREATE INDEX index_name ON table_name (data) USING MARQO;
1 change: 1 addition & 0 deletions evadb/catalog/catalog_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ class VectorStoreType(EvaDBEnum):
CHROMADB # noqa: F821
WEAVIATE # noqa: F821
MILVUS # noqa: F821
MARQO # noqa: F821


class VideoColumnName(EvaDBEnum):
Expand Down
2 changes: 1 addition & 1 deletion evadb/interfaces/relational/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ def create_vector_index(
table_name (str): Name of the table.
expr (str): Expression used to build the vector index.

using (str): Method used for indexing, can be `FAISS` or `QDRANT` or `PINECONE` or `CHROMADB` or `WEAVIATE` or `MILVUS`.
using (str): Method used for indexing, can be `FAISS` or `QDRANT` or `PINECONE` or `CHROMADB` or `WEAVIATE` or `MILVUS` or `MARQO`.

Returns:
EvaDBCursor: The EvaDBCursor object.
Expand Down
2 changes: 2 additions & 0 deletions evadb/parser/lark_visitor/_create_statements.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,8 @@ def vector_store_type(self, tree):
vector_store_type = VectorStoreType.WEAVIATE
elif str.upper(token) == "MILVUS":
vector_store_type = VectorStoreType.MILVUS
elif str.upper(token) == "MARQO":
vector_store_type = VectorStoreType.MARQO
return vector_store_type


Expand Down
111 changes: 111 additions & 0 deletions evadb/third_party/vector_stores/marqo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
# coding=utf-8
# Copyright 2018-2023 EvaDB
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import List

from evadb.third_party.vector_stores.types import (
FeaturePayload,
VectorIndexQuery,
VectorIndexQueryResult,
VectorStore,
)
from evadb.utils.generic_utils import try_to_import_marqo_client

_marqo_client_instance = None

required_params = ["url", "index_name"]


def get_marqo_client(url: str, api_key: str = None):
global _marqo_client_instance
if _marqo_client_instance is None:
try_to_import_marqo_client()
import marqo as mq

_marqo_client_instance = mq.Client(url=url, api_key=api_key)
return _marqo_client_instance


class MarqoVectorStore(VectorStore):
def __init__(
self, index_name: str, url: str = "http://0.0.0.0:8882", api_key=None
) -> None:
self._client = get_marqo_client(url=url)
self._index_name = index_name

def create(self, vector_dim: int):

# Delete index if exists already
if self._index_name in [
i.index_name for i in self._client.get_indexes()["results"]
]:
self.delete()

# create fresh
# Refer here for details - https://docs.marqo.ai/2.0.0/API-Reference/Indexes/create_index/
self._client.create_index(
index_name=self._index_name,
settings_dict={
"index_defaults": {
"model": "no_model",
"model_properties": {"dimensions": vector_dim},
"normalize_embeddings": True,
"ann_parameters": {"space_type": "cosinesimil"},
}
},
)

def add(self, payload: List[FeaturePayload]):

ids = [int(row.id) for row in payload]
embeddings = [row.embedding for row in payload]

data = []
for _id, _emb in zip(ids, embeddings):
_id = str(_id)
data.append({"_id": _id, "evadb_data": {"vector": _emb}})

# For reference and more information
# check - https://docs.marqo.ai/1.4.0/Guides/Advanced-Usage/document_fields/#custom-vector-object
self._client.index(index_name=self._index_name).add_documents(
documents=data,
mappings={"evadb_data": {"type": "custom_vector"}},
tensor_fields=["evadb_data"],
auto_refresh=True,
client_batch_size=64,
)

def delete(self) -> None:
self._client.delete_index(index_name=self._index_name)

def query(
self,
query: VectorIndexQuery,
) -> VectorIndexQueryResult:
response = self._client.index(self._index_name).search(
context={
"tensor": [{"vector": list(query.embedding), "weight": 1}],
},
limit=query.top_k,
)

similarities, ids = [], []

for result in response["hits"]:
ids.append(result["_id"])

# Because it is similarity score
similarities.append(1 - result["_score"])

return VectorIndexQueryResult(similarities=similarities, ids=ids)
7 changes: 7 additions & 0 deletions evadb/third_party/vector_stores/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from evadb.catalog.catalog_type import VectorStoreType
from evadb.third_party.vector_stores.chromadb import ChromaDBVectorStore
from evadb.third_party.vector_stores.faiss import FaissVectorStore
from evadb.third_party.vector_stores.marqo import MarqoVectorStore
from evadb.third_party.vector_stores.milvus import MilvusVectorStore
from evadb.third_party.vector_stores.pinecone import PineconeVectorStore
from evadb.third_party.vector_stores.qdrant import QdrantVectorStore
Expand Down Expand Up @@ -67,5 +68,11 @@ def init_vector_store(
validate_kwargs(kwargs, allowed_params, required_params)
return MilvusVectorStore(index_name, **kwargs)

elif vector_store_type == VectorStoreType.MARQO:
from evadb.third_party.vector_stores.marqo import required_params

validate_kwargs(kwargs, required_params, required_params)
return MarqoVectorStore(index_name, **kwargs)

else:
raise Exception(f"Vector store {vector_store_type} not supported")
18 changes: 18 additions & 0 deletions evadb/utils/generic_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -593,6 +593,16 @@ def try_to_import_milvus_client():
)


def try_to_import_marqo_client():
try:
import marqo # noqa: F401
except ImportError:
raise ValueError(
"""Could not import marqo python package.
Please install it with `pip install marqo`."""
)


def is_qdrant_available() -> bool:
try:
try_to_import_qdrant_client()
Expand Down Expand Up @@ -633,6 +643,14 @@ def is_milvus_available() -> bool:
return False


def is_marqo_available() -> bool:
try:
try_to_import_marqo_client()
return True
except ValueError:
return False


##############################
## UTILS
##############################
Expand Down
3 changes: 3 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ def read(path, encoding="utf-8"):

milvus_libs = ["pymilvus>=2.3.0"]

marqo_libs = ["marqo"]


postgres_libs = [
"psycopg2",
Expand Down Expand Up @@ -177,6 +179,7 @@ def read(path, encoding="utf-8"):
"chromadb": chromadb_libs,
"milvus": milvus_libs,
"weaviate": weaviate_libs,
"marqo": marqo_libs,
"postgres": postgres_libs,
"ludwig": ludwig_libs,
"sklearn": sklearn_libs,
Expand Down