You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
{{ message }}
This repository has been archived by the owner on Aug 16, 2023. It is now read-only.
import numpy as np
import pandas as pd
import time
from pymilvus import (
connections,
utility,
FieldSchema,
CollectionSchema,
DataType,
Collection,
)
from bitstring import BitArray
from random import randint
fp_size = 8192
milvus_host = "127.0.0.1"
milvus_port = "19530"
collection_name = "emols_fp_1024_test"
print("*** Connect to milvus ***")
connections.connect("default", host=milvus_host, port=milvus_port)
has = utility.has_collection(collection_name)
print(f"Does collection exist in Milvus: {has}")
if not has:
print("*** Create collection ***")
fields = [
FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),
FieldSchema(name="smiles", dtype=DataType.VARCHAR, max_length=200),
FieldSchema(
name="morgan_fingerprint",
dtype=DataType.BINARY_VECTOR,
dim=fp_size,
is_primary=False,
),
]
schema = CollectionSchema(fields, collection_name)
ligands_collection = Collection(collection_name, schema, consistency_level="Strong")
print("Collection created")
else:
ligands_collection = Collection(collection_name)
print(ligands_collection)
chunk_size = 100000
smiles = ["test_string"] * chunk_size
for i in range(0, 240):
print("i", i)
fps = [BitArray([randint(0, 1) for i in range(1024)]).bin] * chunk_size
fps_bytes = list(map(lambda p: bytes(p, encoding='utf-8'), fps))
ligands_collection.insert([smiles, fps_bytes])
print("*** Load collection ***")
ligands_collection.load()
print("*** Create index TANIMOTO BIN_IVF_FLAT ***")
index = {
"index_type": "BIN_IVF_FLAT",
"metric_type": "TANIMOTO",
"params": {"nlist": 1024},
}
ligands_collection.create_index("morgan_fingerprint", index)
print("index finished")
info = utility.get_query_segment_info(collection_name=collection_name)
print(info)
print("segments count", len(info))
pre = 0
start = 0
while True:
prog = utility.index_building_progress(collection_name=collection_name)
print(prog)
if prog['indexed_rows'] != pre:
end = time.time()
print("index a segment cost", (end-start), "s")
start = time.time()
pre = prog['indexed_rows']
time.sleep(3)
This script insert 24M binary vectors(8192 dim) into milvus, and build IVF_FLAT index with nlist=1024.
After insert is finished, there are 120 segments generated, and each segment has about 200000 rows.
The index process is very slow. building an index for a segment cost 5 minutes. seems the index process for binary vector is single-thread(only one CPU is busy).
The text was updated successfully, but these errors were encountered:
Deploy milvus 2.1.4 and run this script:
This script insert 24M binary vectors(8192 dim) into milvus, and build IVF_FLAT index with nlist=1024.
After insert is finished, there are 120 segments generated, and each segment has about 200000 rows.
The index process is very slow. building an index for a segment cost 5 minutes. seems the index process for binary vector is single-thread(only one CPU is busy).
The text was updated successfully, but these errors were encountered: