Skip to content

Commit

Permalink
feat: support qdrant (#5)
Browse files Browse the repository at this point in the history
* feat: support qdrant

Signed-off-by: Keming <[email protected]>

* add more dataset

Signed-off-by: Keming <[email protected]>

---------

Signed-off-by: Keming <[email protected]>
  • Loading branch information
kemingy authored Feb 1, 2024
1 parent 48b4906 commit c10c99e
Show file tree
Hide file tree
Showing 11 changed files with 145 additions and 23 deletions.
9 changes: 7 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,17 @@ Supported databases/extensions:

- [x] [`pgvecto.rs`](https://github.com/tensorchord/pgvecto.rs)
- [x] [`pgvector`](https://github.com/pgvector/pgvector)
- [ ] [`qdrant`](https://github.com/qdrant/qdrant/)
- [x] [`qdrant`](https://github.com/qdrant/qdrant/)

Supported datasets:

- [x] random generated
- [x] GIST 960
- [x] GLOVE
- [x] Deep Image
- [x] LAION

For more information, check the [source.py](./vector_bench/dataset/source.py).

## Installation

Expand All @@ -24,7 +29,7 @@ pip install vector_bench
Run the docker compose file under [`server`](server/) folder.

```base
cd server/pgvecto.rs && docker compose up -d
docker compose -f docker/compose.${DB_NAME}.yaml up -d
```

### Client
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
services:
pgvector:
pgvectors:
image: tensorchord/pgvecto-rs:pg15-v0.1.13
container_name: pgvector
container_name: pgvectors
environment:
- POSTGRES_USER=postgres
- POSTGRES_PASSWORD=password
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
services:
pgvector:
image: ankane/pgvector:v0.5.1
image: pgvector/pgvector:0.6.0-pg15
container_name: pgvector
environment:
- POSTGRES_USER=postgres
Expand Down
17 changes: 17 additions & 0 deletions docker/compose.qdrant.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
services:
qdrant:
image: qdrant/qdrant:v1.7.4
container_name: qdrant
ports:
- "6333:6333"
logging:
driver: "json-file"
options:
max-file: "1"
max-size: "10m"
deploy:
resources:
limits:
cpus: "8"
reservations:
cpus: "4"
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ dependencies = [
"tqdm~=4.66",
"httpx~=0.25",
"psycopg[binary]~=3.1",
"qdrant-client~=1.7.1",
]
[project.optional-dependencies]
dev = [
Expand Down
2 changes: 2 additions & 0 deletions vector_bench/client/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from vector_bench.client.pgvecto_rs import PgVectorsClient
from vector_bench.client.pgvector import PgvectorClient
from vector_bench.client.qdrant import QdrantVectorClient
from vector_bench.spec import EnumSelector


class DataBaseClient(EnumSelector):
PGVECTO_RS = PgVectorsClient
PGVECTOR = PgvectorClient
QDRANT = QdrantVectorClient
2 changes: 1 addition & 1 deletion vector_bench/client/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def insert_batch(self, records: list[Record]):
pass

@abc.abstractmethod
def query(self, vector: list[float], top_k: int = 10):
def query(self, vector: list[float], top_k: int = 10) -> list[Record]:
pass

@abc.abstractclassmethod
Expand Down
17 changes: 2 additions & 15 deletions vector_bench/client/pgvecto_rs.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
class VectorDumper(Dumper):
def dump(self, obj):
if isinstance(obj, np.ndarray):
obj = f"[{','.join(map(str, obj))}]"
return str(obj).replace(" ", "")
return f"[{','.join(map(str, obj))}]".encode()
return str(obj).replace(" ", "").encode()


class VectorLoader(Loader):
Expand Down Expand Up @@ -152,19 +152,6 @@ def indexing(self):
conn.execute(self.sql_create_index)
conn.commit()

async def insert(self, record: Record):
async with await psycopg.AsyncConnection.connect(self.url) as conn:
register_vector_async(conn)
await conn.execute(
self.sql_insert,
(
record.id,
record.vector,
Jsonb(record.metadata or {}, dumps=msgspec.json.encode),
),
)
await conn.commit()

def insert_batch(self, records: list[Record]):
with psycopg.connect(self.url) as conn:
register_vector(conn)
Expand Down
4 changes: 2 additions & 2 deletions vector_bench/client/pgvector.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def to_db_binary(value):

class VectorDumper(Dumper):
def dump(self, obj):
return to_db(obj).encode("utf8")
return to_db(obj).encode()


class VectorBinaryDumper(VectorDumper):
Expand All @@ -83,7 +83,7 @@ class VectorLoader(Loader):
def load(self, data):
if isinstance(data, memoryview):
data = bytes(data)
return from_db(data.decode("utf8"))
return from_db(data.decode())


class VectorBinaryLoader(VectorLoader):
Expand Down
70 changes: 70 additions & 0 deletions vector_bench/client/qdrant.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
from __future__ import annotations

from qdrant_client import QdrantClient
from qdrant_client.models import Distance as QdrantDistance
from qdrant_client.models import PointStruct, ScoredPoint, VectorParams

from vector_bench.client.base import BaseClient
from vector_bench.spec import DatabaseConfig, Distance, Record

DISTANCE_TO_QDRANT = {
Distance.COSINE: QdrantDistance.COSINE,
Distance.EUCLIDEAN: QdrantDistance.EUCLID,
Distance.DOT_PRODUCT: QdrantDistance.DOT,
}


class QdrantVectorClient(BaseClient):
dim: int
url: str
table: str
distance: Distance

@classmethod
def from_config(cls, config: DatabaseConfig) -> QdrantVectorClient:
cls.dim = config.vector_dim
cls.url = config.url
cls.table = f"{config.table}_qdrant"
cls.distance = config.distance

cls = QdrantVectorClient()
cls.init_db()
return cls

def init_db(self):
self.client = QdrantClient(url=self.url)
collections_response = self.client.get_collections()
for collection in collections_response.collections:
if collection.name == self.table:
# already exists, return
return

self.client.create_collection(
collection_name=self.table,
vectors_config=VectorParams(
size=self.dim,
distance=DISTANCE_TO_QDRANT[self.distance.__func__],
),
)

def insert_batch(self, records: list[Record]):
self.client.upsert(
collection_name=self.table,
points=[
PointStruct(
id=record.id, vector=record.vector.tolist(), payload=record.metadata
)
for record in records
],
)

def query(self, vector: list[float], top_k: int = 10) -> list[Record]:
points: list[ScoredPoint] = self.client.search(
collection_name=self.table,
query_vector=vector,
limit=top_k,
)
return [
Record(id=point.id, vector=point.vector, metadata=point.payload)
for point in points
]
40 changes: 40 additions & 0 deletions vector_bench/dataset/source.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,42 @@
link="https://ann-benchmarks.com/gist-960-euclidean.hdf5",
)

GLOVE_25_COSINE = DatasetConfig(
vector_dim=25,
num=1_200_000,
distance=Distance.COSINE,
type=FileType.H5,
path="datasets/glove-25-angular.hdf5",
link="https://ann-benchmarks.com/glove-25-angular.hdf5",
)

GLOVE_100_COSINE = DatasetConfig(
vector_dim=100,
num=1_200_000,
distance=Distance.COSINE,
type=FileType.H5,
path="datasets/glove-100-angular.hdf5",
link="https://ann-benchmarks.com/glove-100-angular.hdf5",
)

DEEP_96_COSINE = DatasetConfig(
vector_dim=96,
num=10_000_000,
distance=Distance.COSINE,
type=FileType.H5,
path="datasets/deep-image-96-angular.hdf5",
link="https://ann-benchmarks.com/deep-image-96-angular.hdf5",
)

LAION_768_DOT_PRODUCT = DatasetConfig(
vector_dim=512,
num=5_000_000,
distance=Distance.DOT_PRODUCT,
type=FileType.H5,
path="datasets/laion-768-ip.hdf5",
link="https://myscale-datasets.s3.ap-southeast-1.amazonaws.com/laion-5m-test-ip.hdf5",
)

RANDOM_128_L2 = DatasetConfig(
vector_dim=128,
num=100_000,
Expand All @@ -23,4 +59,8 @@

class DataSource(EnumSelector):
GIST_960_L2 = GIST_960_L2
GLOVE_25_COSINE = GLOVE_25_COSINE
GLOVE_100_COSINE = GLOVE_100_COSINE
DEEP_96_COSINE = DEEP_96_COSINE
LAION_768_DOT_PRODUCT = LAION_768_DOT_PRODUCT
RANDOM_128_L2 = RANDOM_128_L2

0 comments on commit c10c99e

Please sign in to comment.