Skip to content

Commit

Permalink
fix with comments
Browse files Browse the repository at this point in the history
Signed-off-by: cutecutecat <[email protected]>
  • Loading branch information
cutecutecat committed Jul 23, 2024
1 parent 59c5e8a commit c6acb93
Show file tree
Hide file tree
Showing 7 changed files with 59 additions and 64 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ class Item(models.Model):
fields=["embedding"],
opclasses=["vector_l2_ops"],
# don't pass any of `m`, `ef_construction`, `threads`, `quantization_type` or `quantization_ratio`
# if created by `with_option`, they will be overwritten
# if created by `with_option`, they will be overridden
).with_option(
IndexOption(index=Hnsw(m=16, ef_construction=100), threads=1)
),
Expand Down
21 changes: 21 additions & 0 deletions src/pgvecto_rs/django/indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,14 @@ def get_with_params(self):
return [f"options = $${option.dumps()}$$"]

def with_option(self, option: IndexOption):
"""
Fill a partially initialized HnswIndex object with option, override arguments:
- m
- ef_construction
- threads
- quantization_type
- quantization_ratio
"""
if not isinstance(option.index, Hnsw):
raise IndexOptionTypeError(Hnsw, type(option.index))
self.m = option.index.m
Expand Down Expand Up @@ -126,6 +134,13 @@ def get_with_params(self):
return [f"options = $${option.dumps()}$$"]

def with_option(self, option: IndexOption):
"""
Fill a partially initialized IvfIndex object with option, override arguments:
- nlist
- threads
- quantization_type
- quantization_ratio
"""
if not isinstance(option.index, Ivf):
raise IndexOptionTypeError(Ivf, type(option.index))
self.nlist = option.index.nlist
Expand Down Expand Up @@ -167,6 +182,12 @@ def get_with_params(self):
return [f"options = $${option.dumps()}$$"]

def with_option(self, option: IndexOption):
"""
Fill a partially initialized FlatIndex object with option, override arguments:
- threads
- quantization_type
- quantization_ratio
"""
if not isinstance(option.index, Flat):
raise IndexOptionTypeError(Flat, type(option.index))
super().with_option(option)
Expand Down
5 changes: 5 additions & 0 deletions src/pgvecto_rs/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,8 @@ def __init__(self, required_type: type, dtype: type) -> None:
super().__init__(
f"the index requires IndexOption of {required_type} type, but got {dtype}"
)


class TextParseError(PGVectoRsError):
def __init__(self, payload: str, dtype: type) -> None:
super().__init__(f"failed to parse text of '{payload}' as a {dtype}")
12 changes: 8 additions & 4 deletions src/pgvecto_rs/types/bvector.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import numpy as np

from pgvecto_rs.errors import NDArrayDimensionError, ToDBDimUnequalError
from pgvecto_rs.errors import NDArrayDimensionError, TextParseError, ToDBDimUnequalError


class BinaryVector:
Expand Down Expand Up @@ -42,14 +42,18 @@ def to_binary(self):

@classmethod
def from_text(cls, value):
return cls([int(v) for v in value[1:-1].split(",")])
left, right = value.find("["), value.rfind("]")
if left == -1 or right == -1 or left > right:
raise TextParseError(value, cls)
return cls([int(v) for v in value[left + 1 : right].split(",")])

@classmethod
def from_binary(cls, value):
view = memoryview(value)
# start reading buffer from 3th byte (first 2 bytes are for dimension info)
dim = unpack("<H", value[:2])[0]
dim = unpack("<H", view[:2])[0]
length = math.ceil(dim / 64)
data = np.frombuffer(value, dtype="<u8", count=length, offset=2).view(np.uint8)
data = np.frombuffer(view, dtype="<u8", count=length, offset=2).view(np.uint8)
return cls(np.unpackbits(data, bitorder="little", count=dim))

@classmethod
Expand Down
22 changes: 13 additions & 9 deletions src/pgvecto_rs/types/svector.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
SparseExtraArgError,
SparseMissingArgError,
SparseShapeError,
TextParseError,
ToDBDimUnequalError,
)

Expand Down Expand Up @@ -142,30 +143,33 @@ def _from_dense(self, value):
self._values = [float(value[i]) for i in self._indices]

@classmethod
def from_text(cls, value):
def from_text(cls, value: str):
elements, dim = value.split("/", 2)
left, right = elements.find("{"), elements.rfind("}")
if left == -1 or right == -1 or left > right:
raise TextParseError(value, cls)
indices = []
values = []
for e in elements[1:-1].split(","):
for e in elements[left + 1 : right].split(","):
i, v = e.split(":", 2)
indices.append(int(i))
values.append(float(v))
return cls._from_parts(int(dim), indices, values)

@classmethod
def from_binary(cls, value):
view = memoryview(value)
# unpack dims and length as little-endian uint32, keep same endian with pgvecto.rs
dims = unpack("<I", value[:4])[0]
length = unpack("<I", value[4:8])[0]
bytes = value[8:]
dims = unpack("<I", view[:4])[0]
length = unpack("<I", view[4:8])[0]
bytes = view[8:]
# unpack indices and values as little-endian uint32 and float32, keep same endian with pgvecto.rs
indices = np.frombuffer(bytes, dtype="<I", count=length, offset=0).astype(
np.uint32
)
bytes = bytes[4 * length :]
values = np.frombuffer(bytes, dtype="<f", count=length, offset=0).astype(
np.float32
)
values = np.frombuffer(
bytes, dtype="<f", count=length, offset=4 * length
).astype(np.float32)
return cls.from_parts(dims, indices, values)

@classmethod
Expand Down
51 changes: 4 additions & 47 deletions src/pgvecto_rs/types/vecf16.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from struct import pack, unpack
from struct import unpack

import numpy as np

from pgvecto_rs.errors import NDArrayDimensionError, ToDBDimUnequalError
from pgvecto_rs.errors import NDArrayDimensionError
from pgvecto_rs.types import Vector


class Float16Vector:
class Float16Vector(Vector):
def __init__(self, value):
# asarray still copies if same dtype
if not isinstance(value, np.ndarray) or value.dtype != "<f2":
Expand All @@ -19,27 +20,6 @@ def __init__(self, value):
def __repr__(self):
return f"Float16Vector({self.to_list()})"

def dimensions(self):
return len(self._value)

def to_list(self):
return self._value.tolist()

def to_numpy(self):
return self._value

def to_text(self):
return "[" + ",".join([str(float(v)) for v in self._value]) + "]"

def to_binary(self):
# pack to little-endian uint16, keep same endian with pgvecto.rs
dims: bytes = pack("<H", self._value.shape[0])
return dims + self._value.tobytes()

@classmethod
def from_text(cls, value):
return cls([float(v) for v in value[1:-1].split(",")])

@classmethod
def from_binary(cls, value):
dim = unpack("<H", value[:2])[0]
Expand All @@ -48,29 +28,6 @@ def from_binary(cls, value):
np.frombuffer(value, dtype="<f2", count=dim, offset=2).astype(np.float16)
)

@classmethod
def _to_db(cls, value, dim=None):
if value is None:
return value

if not isinstance(value, cls):
value = cls(value)

if dim is not None and value.dimensions() != dim:
raise ToDBDimUnequalError(dim, value.dimensions())

return value.to_text()

@classmethod
def _to_db_binary(cls, value):
if value is None:
return value

if not isinstance(value, cls):
value = cls(value)

return value.to_binary()

@classmethod
def _from_db(cls, value):
if value is None or isinstance(value, cls):
Expand Down
10 changes: 7 additions & 3 deletions src/pgvecto_rs/types/vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,18 @@ def to_binary(self):

@classmethod
def from_text(cls, value):
return cls([float(v) for v in value[1:-1].split(",")])
left, right = value.find("["), value.rfind("]")
if left == -1 or right == -1 or left > right:
raise ValueError
return cls([float(v) for v in value[left + 1 : right].split(",")])

@classmethod
def from_binary(cls, value):
dim = unpack("<H", value[:2])[0]
view = memoryview(value)
dim = unpack("<H", view[:2])[0]
# start reading buffer from 3th byte (first 2 bytes are for dimension info)
return cls(
np.frombuffer(value, dtype="<f", count=dim, offset=2).astype(np.float32)
np.frombuffer(view, dtype="<f", count=dim, offset=2).astype(np.float32)
)

@classmethod
Expand Down

0 comments on commit c6acb93

Please sign in to comment.