Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: support qdrant #5

Merged
merged 2 commits into from
Feb 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,17 @@ Supported databases/extensions:

- [x] [`pgvecto.rs`](https://github.com/tensorchord/pgvecto.rs)
- [x] [`pgvector`](https://github.com/pgvector/pgvector)
- [ ] [`qdrant`](https://github.com/qdrant/qdrant/)
- [x] [`qdrant`](https://github.com/qdrant/qdrant/)

Supported datasets:

- [x] random generated
- [x] GIST 960
- [x] GLOVE
- [x] Deep Image
- [x] LAION

For more information, check the [source.py](./vector_bench/dataset/source.py).

## Installation

Expand All @@ -24,7 +29,7 @@ pip install vector_bench
Run the docker compose file under [`server`](server/) folder.

```base
cd server/pgvecto.rs && docker compose up -d
docker compose -f docker/compose.${DB_NAME}.yaml up -d
```

### Client
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
services:
pgvector:
pgvectors:
image: tensorchord/pgvecto-rs:pg15-v0.1.13
container_name: pgvector
container_name: pgvectors
environment:
- POSTGRES_USER=postgres
- POSTGRES_PASSWORD=password
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
services:
pgvector:
image: ankane/pgvector:v0.5.1
image: pgvector/pgvector:0.6.0-pg15
container_name: pgvector
environment:
- POSTGRES_USER=postgres
Expand Down
17 changes: 17 additions & 0 deletions docker/compose.qdrant.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
services:
qdrant:
image: qdrant/qdrant:v1.7.4
container_name: qdrant
ports:
- "6333:6333"
logging:
driver: "json-file"
options:
max-file: "1"
max-size: "10m"
deploy:
resources:
limits:
cpus: "8"
reservations:
cpus: "4"
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ dependencies = [
"tqdm~=4.66",
"httpx~=0.25",
"psycopg[binary]~=3.1",
"qdrant-client~=1.7.1",
]
[project.optional-dependencies]
dev = [
Expand Down
2 changes: 2 additions & 0 deletions vector_bench/client/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from vector_bench.client.pgvecto_rs import PgVectorsClient
from vector_bench.client.pgvector import PgvectorClient
from vector_bench.client.qdrant import QdrantVectorClient
from vector_bench.spec import EnumSelector


class DataBaseClient(EnumSelector):
PGVECTO_RS = PgVectorsClient
PGVECTOR = PgvectorClient
QDRANT = QdrantVectorClient
2 changes: 1 addition & 1 deletion vector_bench/client/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def insert_batch(self, records: list[Record]):
pass

@abc.abstractmethod
def query(self, vector: list[float], top_k: int = 10):
def query(self, vector: list[float], top_k: int = 10) -> list[Record]:
pass

@abc.abstractclassmethod
Expand Down
17 changes: 2 additions & 15 deletions vector_bench/client/pgvecto_rs.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
class VectorDumper(Dumper):
def dump(self, obj):
if isinstance(obj, np.ndarray):
obj = f"[{','.join(map(str, obj))}]"
return str(obj).replace(" ", "")
return f"[{','.join(map(str, obj))}]".encode()
return str(obj).replace(" ", "").encode()


class VectorLoader(Loader):
Expand Down Expand Up @@ -152,19 +152,6 @@ def indexing(self):
conn.execute(self.sql_create_index)
conn.commit()

async def insert(self, record: Record):
async with await psycopg.AsyncConnection.connect(self.url) as conn:
register_vector_async(conn)
await conn.execute(
self.sql_insert,
(
record.id,
record.vector,
Jsonb(record.metadata or {}, dumps=msgspec.json.encode),
),
)
await conn.commit()

def insert_batch(self, records: list[Record]):
with psycopg.connect(self.url) as conn:
register_vector(conn)
Expand Down
4 changes: 2 additions & 2 deletions vector_bench/client/pgvector.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def to_db_binary(value):

class VectorDumper(Dumper):
def dump(self, obj):
return to_db(obj).encode("utf8")
return to_db(obj).encode()


class VectorBinaryDumper(VectorDumper):
Expand All @@ -83,7 +83,7 @@ class VectorLoader(Loader):
def load(self, data):
if isinstance(data, memoryview):
data = bytes(data)
return from_db(data.decode("utf8"))
return from_db(data.decode())


class VectorBinaryLoader(VectorLoader):
Expand Down
70 changes: 70 additions & 0 deletions vector_bench/client/qdrant.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
from __future__ import annotations

from qdrant_client import QdrantClient
from qdrant_client.models import Distance as QdrantDistance
from qdrant_client.models import PointStruct, ScoredPoint, VectorParams

from vector_bench.client.base import BaseClient
from vector_bench.spec import DatabaseConfig, Distance, Record

DISTANCE_TO_QDRANT = {
Distance.COSINE: QdrantDistance.COSINE,
Distance.EUCLIDEAN: QdrantDistance.EUCLID,
Distance.DOT_PRODUCT: QdrantDistance.DOT,
}


class QdrantVectorClient(BaseClient):
dim: int
url: str
table: str
distance: Distance

@classmethod
def from_config(cls, config: DatabaseConfig) -> QdrantVectorClient:
cls.dim = config.vector_dim
cls.url = config.url
cls.table = f"{config.table}_qdrant"
cls.distance = config.distance

cls = QdrantVectorClient()
cls.init_db()
return cls

def init_db(self):
self.client = QdrantClient(url=self.url)
collections_response = self.client.get_collections()
for collection in collections_response.collections:
if collection.name == self.table:
# already exists, return
return

self.client.create_collection(
collection_name=self.table,
vectors_config=VectorParams(
size=self.dim,
distance=DISTANCE_TO_QDRANT[self.distance.__func__],
),
)

def insert_batch(self, records: list[Record]):
self.client.upsert(
collection_name=self.table,
points=[
PointStruct(
id=record.id, vector=record.vector.tolist(), payload=record.metadata
)
for record in records
],
)

def query(self, vector: list[float], top_k: int = 10) -> list[Record]:
points: list[ScoredPoint] = self.client.search(
collection_name=self.table,
query_vector=vector,
limit=top_k,
)
return [
Record(id=point.id, vector=point.vector, metadata=point.payload)
for point in points
]
40 changes: 40 additions & 0 deletions vector_bench/dataset/source.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,42 @@
link="https://ann-benchmarks.com/gist-960-euclidean.hdf5",
)

GLOVE_25_COSINE = DatasetConfig(
vector_dim=25,
num=1_200_000,
distance=Distance.COSINE,
type=FileType.H5,
path="datasets/glove-25-angular.hdf5",
link="https://ann-benchmarks.com/glove-25-angular.hdf5",
)

GLOVE_100_COSINE = DatasetConfig(
vector_dim=100,
num=1_200_000,
distance=Distance.COSINE,
type=FileType.H5,
path="datasets/glove-100-angular.hdf5",
link="https://ann-benchmarks.com/glove-100-angular.hdf5",
)

DEEP_96_COSINE = DatasetConfig(
vector_dim=96,
num=10_000_000,
distance=Distance.COSINE,
type=FileType.H5,
path="datasets/deep-image-96-angular.hdf5",
link="https://ann-benchmarks.com/deep-image-96-angular.hdf5",
)

LAION_768_DOT_PRODUCT = DatasetConfig(
vector_dim=512,
num=5_000_000,
distance=Distance.DOT_PRODUCT,
type=FileType.H5,
path="datasets/laion-768-ip.hdf5",
link="https://myscale-datasets.s3.ap-southeast-1.amazonaws.com/laion-5m-test-ip.hdf5",
)

RANDOM_128_L2 = DatasetConfig(
vector_dim=128,
num=100_000,
Expand All @@ -23,4 +59,8 @@

class DataSource(EnumSelector):
GIST_960_L2 = GIST_960_L2
GLOVE_25_COSINE = GLOVE_25_COSINE
GLOVE_100_COSINE = GLOVE_100_COSINE
DEEP_96_COSINE = DEEP_96_COSINE
LAION_768_DOT_PRODUCT = LAION_768_DOT_PRODUCT
RANDOM_128_L2 = RANDOM_128_L2