-
Notifications
You must be signed in to change notification settings - Fork 73
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: Python native SDK (without SQL) #528
Comments
Design of native SDKManage DatabasesAbilities
from sdk import PGVectoClient
client = PGVectoClient(host="127.0.0.1", port=19530, user_name="postgres", db_name="postgres", password="") Manage SchemaConceptSupported data types:
Column attributes:
Abilities
from sdk import Field, VectorField, Schema, DataType
id_field = Field(name="id", dtype=DataType.INT, is_primary=True, description="primary id")
age_field = Field(name="age", dtype=DataType.INT, description="age")
embedding_field = VectorField(name="embedding", dtype=DataType.VECTOR, dim=128, description="vector")
position_field = Field(name="position", dtype=DataType.TEXT)
schema = Schema(fields=[id_field, age_field, embedding_field], auto_id=False, description="desc of a collection", partition=None) Manage CollectionsConcept
Abilities
# Quick setup mode without schema, with columns: id(int), vector(Vector) and meta(jsonb)
client.create_basic_collection(
collection_name="quick_setup",
dimension=5,
)
# Custom mode: create columns by schema
client.create_collection(
collection_name="customized_setup",
schema=schema,
)
client.drop_collection(
collection_name="customized_setup"
) Data InsertAbilities
data=[
{"id": 0, "vector": [0.3580376395471989, -0.6023495712049978, 0.18414012509913835, -0.26286205330961354, 0.9029438446296592], "color": "pink_8682"},
{"id": 1, "vector": [0.19886812562848388, 0.06023560599112088, 0.6976963061752597, 0.2614474506242501, 0.838729485096104], "color": "red_7025"},
]
client.insert(
collection_name="quick_setup",
data=data
) Update and DeleteAbilities
# UPDATE table SET ... WHERE id=3;
# INSERT INTO table (id, ...)
# SELECT ...
# WHERE NOT EXISTS (SELECT 1 FROM table WHERE id=3);
# Insert if id doesn't exist, else update
res = client.upsert(
collection_name='quick_setup',
data=data
)
# UPDATE table SET ... WHERE color=pink_8682;
res = client.update(
collection_name='quick_setup',
data= {"vector": [0.3580376395471989, -0.6023495712049978, 0.18414012509913835, -0.26286205330961354, 0.9029438446296592], "color": "pink_8682"},
filter="color = \"pink_8682\"",
)
# DELETE from quick_setup where id != ANY('{18, 19}'::int[])
res = client.delete(
collection_name="quick_setup",
ids=[18, 19],
)
res = client.delete(
collection_name='quick_setup',
filter='color like "blue%"'
) Create IndexConcept
Abilities
client.create_vector_index(
collection_name="customized_setup",
field_name="my_vector",
metric_type="IP",
option=IndexOption(...)
)
client.drop_index(
index_name="idx"
) SearchSingle-Vector Search
{
"id": 0,
"distance": 1.4093276262283325,
"entity": {}
},
{
"id": 4,
"distance": 0.9902134537696838,
"entity": {}
}, from sdk import ANNSearchRequest
req = ANNSearchRequest(
data: Vector | SparseVector | ...,
field: str,
metric_type: str,
limit: int | None,
filter: str | None,
range: float | None,
group_by_field: str | None,
outputs: List[str] | None,
distance_alias: str = "distance",
)
# Single-vector search
# SELECT id, emb <=> [1, 1, 1] as distance from t ORDER BY emb <=> [1, 1, 1] LIMIT 5
req = ANNSearchRequest(data=[1, 1, 1], field="emb", metric_type="L2", limit=5)
# Search with extra output fields
# SELECT id, emb <=> [1, 1, 1] as distance, color from t ORDER BY emb <=> [1, 1, 1] LIMIT 5
req = ANNSearchRequest(data=[1, 1, 1], field="emb", metric_type="L2", limit=5, outputs=["color"])
# SELECT id, emb <=> [1, 1, 1] as dis, distance from t ORDER BY emb <=> [1, 1, 1] LIMIT 5
req = ANNSearchRequest(data=[1, 1, 1], field="emb", metric_type="L2", limit=5, outputs=["distance"], distance_alias="dis")
# Filtered search
# SELECT id, emb <=> [1, 1, 1] as dis, distance from t WHERE age > 5 ORDER BY emb <=> [1, 1, 1]
req = ANNSearchRequest(data=[1, 1, 1], field="emb", metric_type="L2", filter="age > 5")
# Range search
# SELECT id, emb <=> [1, 1, 1] as dis, distance from t WHERE emb <<=>> sphere([1, 1, 1], 0.2) ORDER BY emb <=> [1, 1, 1]
req = ANNSearchRequest(data=[1, 1, 1], field="emb", metric_type="L2", range=0.2, limit=5)
# Group search: https://milvus.io/docs/single-vector-search.md#Grouping-search
req = ANNSearchRequest(data=[1, 1, 1], field="emb", metric_type="L2", limit=10, group_by_field="doc_id",
output_fields=["doc_id", "passage_id"])
res = client.search(req) Hybrid searchfrom sdk import RRFRanker
rerank = RRFRanker()
reqs = [request_1, request_2]
client.hybrid_search(
reqs,
rerank,
limit=2
) Iterative Search# Create iterator
res = client.search_iterator(req, batch_size=10)
results = []
# Iter until end
while True:
result = iterator.next()
if not result:
iterator.close()
break
results.extend(result) Manage PartitionsConcept
Abilities
from sdk.partition import Partition, Hash, In, Range
# Hash partition - Random split inserted rows
# CREATE TABLE partitionA PARTITION OF documents FOR VALUES FROM WITH (MODULUS 3, REMAINDER 0);
p = Partition(
partition_name="partitionA",
partition_field="id",
partition_by=Hash(3, 0)
)
# Group partition - Split discrete data based on distribution
# CREATE TABLE partitionA PARTITION OF documents FOR VALUES IN ('A', 'B');
p = Partition(
partition_name="partitionA",
partition_field="alpha",
partition_by=In(('A', 'B'))
)
# Range partition - Split continuous data based on distribution
# CREATE TABLE partitionA PARTITION OF documents FOR VALUES FROM ('2023-03-01') TO ('2023-04-01');
p = Partition(
partition_name="partitionA",
partition_field="day"
partition_by=Range('2023-03-01', '2023-04-01')
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
No description provided.
The text was updated successfully, but these errors were encountered: