Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add marqo vector store integration #1443

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions evadb/catalog/catalog_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ class VectorStoreType(EvaDBEnum):
CHROMADB # noqa: F821
WEAVIATE # noqa: F821
MILVUS # noqa: F821
MARQO # noqa: F821


class VideoColumnName(EvaDBEnum):
Expand Down
2 changes: 1 addition & 1 deletion evadb/interfaces/relational/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ def create_vector_index(
table_name (str): Name of the table.
expr (str): Expression used to build the vector index.

using (str): Method used for indexing, can be `FAISS` or `QDRANT` or `PINECONE` or `CHROMADB` or `WEAVIATE` or `MILVUS`.
using (str): Method used for indexing, can be `FAISS` or `QDRANT` or `PINECONE` or `CHROMADB` or `WEAVIATE` or `MILVUS` or `MARQO`.

Returns:
EvaDBCursor: The EvaDBCursor object.
Expand Down
2 changes: 2 additions & 0 deletions evadb/parser/lark_visitor/_create_statements.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,8 @@ def vector_store_type(self, tree):
vector_store_type = VectorStoreType.WEAVIATE
elif str.upper(token) == "MILVUS":
vector_store_type = VectorStoreType.MILVUS
elif str.upper(token) == "MARQO":
vector_store_type = VectorStoreType.MARQO
return vector_store_type


Expand Down
118 changes: 118 additions & 0 deletions evadb/third_party/vector_stores/marqo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
from typing import List

from evadb.third_party.vector_stores.types import (
FeaturePayload,
VectorIndexQuery,
VectorIndexQueryResult,
VectorStore,
)
from evadb.utils.generic_utils import try_to_import_marqo_client

_marqo_client_instance = None

required_params = ["url", "index_name"]


def get_marqo_client(url: str, api_key: str=None):
global _marqo_client_instance
if _marqo_client_instance is None:
try_to_import_marqo_client()
import marqo as mq
_marqo_client_instance = mq.Client(url=url, api_key=api_key)
return _marqo_client_instance


class MarqoVectorStore(VectorStore):
def __init__(self, index_name: str, url: str = "http://0.0.0.0:8882", api_key=None) -> None:
self._client = get_marqo_client(url=url)
self._index_name = index_name

def create(self, vector_dim: int):

# Delete index if exists already
if self._index_name in [i.index_name for i in self._client.get_indexes()['results']]:
self.delete()

# create fresh
# Refer here for details - https://docs.marqo.ai/2.0.0/API-Reference/Indexes/create_index/
self._client.create_index(
index_name=self._index_name,
settings_dict={
'index_defaults': {
'model': 'no_model',
'model_properties': {
'dimensions': vector_dim
},

'normalize_embeddings': True,
'ann_parameters':{
'space_type': 'cosinesimil'
}
}
}
)

def add(self, payload: List[FeaturePayload]):

ids = [int(row.id) for row in payload]
embeddings = [row.embedding for row in payload]

data = []
for _id, _emb in zip(ids, embeddings):
_id = str(_id)
data.append(
{
'_id': _id,
'evadb_data':{
'vector': _emb
}
}
)

# For reference and more information
# check - https://docs.marqo.ai/1.4.0/Guides/Advanced-Usage/document_fields/#custom-vector-object
self._client.index(
index_name=self._index_name
).add_documents(
documents=data,
mappings={
'evadb_data':{
'type': 'custom_vector'
}
},
tensor_fields=['evadb_data'],
auto_refresh=True,
client_batch_size=64
)


def delete(self) -> None:
self._client.delete_index(index_name=self._index_name)

def query(
self,
query: VectorIndexQuery,
) -> VectorIndexQueryResult:
response = self._client.index(
self._index_name).search(
context={
'tensor':[
{
'vector': list(query.embedding),
'weight' : 1
}
],
},
limit=query.top_k
)

similarities, ids = [], []

for result in response['hits']:
ids.append(result['_id'])

# Because it is similarity score
similarities.append(1-result['_score'])

return VectorIndexQueryResult(similarities=similarities, ids=ids)

7 changes: 7 additions & 0 deletions evadb/third_party/vector_stores/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from evadb.catalog.catalog_type import VectorStoreType
from evadb.third_party.vector_stores.chromadb import ChromaDBVectorStore
from evadb.third_party.vector_stores.faiss import FaissVectorStore
from evadb.third_party.vector_stores.marqo import MarqoVectorStore
from evadb.third_party.vector_stores.milvus import MilvusVectorStore
from evadb.third_party.vector_stores.pinecone import PineconeVectorStore
from evadb.third_party.vector_stores.qdrant import QdrantVectorStore
Expand Down Expand Up @@ -67,5 +68,11 @@ def init_vector_store(
validate_kwargs(kwargs, allowed_params, required_params)
return MilvusVectorStore(index_name, **kwargs)

elif vector_store_type == VectorStoreType.MARQO:
from evadb.third_party.vector_stores.marqo import required_params

validate_kwargs(kwargs, required_params, required_params)
return MarqoVectorStore(index_name, **kwargs)

else:
raise Exception(f"Vector store {vector_store_type} not supported")
18 changes: 18 additions & 0 deletions evadb/utils/generic_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -593,6 +593,16 @@ def try_to_import_milvus_client():
)


def try_to_import_marqo_client():
try:
import marqo # noqa: F401
except ImportError:
raise ValueError(
"""Could not import marqo python package.
Please install it with `pip install marqo`."""
)


def is_qdrant_available() -> bool:
try:
try_to_import_qdrant_client()
Expand Down Expand Up @@ -633,6 +643,14 @@ def is_milvus_available() -> bool:
return False


def is_marqo_available() -> bool:
try:
try_to_import_marqo_client()
return True
except ValueError:
return False


##############################
## UTILS
##############################
Expand Down
3 changes: 3 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ def read(path, encoding="utf-8"):

milvus_libs = ["pymilvus>=2.3.0"]

marqo_libs = ["marqo"]


postgres_libs = [
"psycopg2",
Expand Down Expand Up @@ -177,6 +179,7 @@ def read(path, encoding="utf-8"):
"chromadb": chromadb_libs,
"milvus": milvus_libs,
"weaviate": weaviate_libs,
"marqo": marqo_libs,
"postgres": postgres_libs,
"ludwig": ludwig_libs,
"sklearn": sklearn_libs,
Expand Down