Skip to content

Commit

Permalink
[experimental] use centroids generated by faiss
Browse files Browse the repository at this point in the history
Signed-off-by: usamoi <[email protected]>
  • Loading branch information
usamoi committed Aug 28, 2024
1 parent c29308c commit 60ac98b
Showing 1 changed file with 34 additions and 4 deletions.
38 changes: 34 additions & 4 deletions crates/rabitq/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use common::json::Json;
use common::mmap_array::MmapArray;
use common::remap::RemappedCollection;
use common::vec2::Vec2;
use k_means::{k_means, k_means_lookup, k_means_lookup_many};
use k_means::{k_means_lookup, k_means_lookup_many};
use rayon::iter::{IntoParallelIterator, ParallelIterator};
use std::fs::create_dir;
use std::path::Path;
Expand Down Expand Up @@ -108,7 +108,7 @@ fn from_nothing<O: Op>(
create_dir(path.as_ref()).unwrap();
let RabitqIndexingOptions {
nlist,
spherical_centroids,
spherical_centroids: _,
} = options.indexing.clone().unwrap_rabitq();
let projection = {
use nalgebra::{DMatrix, QR};
Expand All @@ -130,9 +130,39 @@ fn from_nothing<O: Op>(
}
projection
};
let samples = O::sample(collection);
rayon::check();
let centroids: Vec2<f32> = k_means(nlist as usize, samples, spherical_centroids);
// let centroids: Vec2<f32> = k_means(nlist as usize, samples, spherical_centroids);
let centroids: Vec2<f32> = {
fn read_vecs(path: impl AsRef<Path>) -> std::io::Result<Vec<Vec<f32>>> {
use std::io::Read;

let file = std::fs::File::open(path)?;
let mut reader = std::io::BufReader::new(file);
let mut buf = [0u8; 4];
let mut vecs = Vec::new();
loop {
let count = reader.read(&mut buf)?;
if count == 0 {
break;
}
let dim = u32::from_le_bytes(buf) as usize;
let mut vec = Vec::with_capacity(dim);
for _ in 0..dim {
reader.read_exact(&mut buf)?;
vec.push(f32::from_le_bytes(buf));
}
vecs.push(vec);
}
Ok(vecs)
}
fn load_centroids_from_fvecs(path: impl AsRef<Path>) -> Vec2<f32> {
let fvecs = read_vecs(&path).expect("read centroids error");
let nlist = fvecs.len();
let dims = fvecs[0].len();
Vec2::from_vec((nlist, dims), fvecs.into_iter().flatten().collect())
}
load_centroids_from_fvecs("/usamoi/repos/RaBitQ/gist/gist_centroid_4096.fvecs")
};
rayon::check();
let ls = (0..collection.len())
.into_par_iter()
Expand Down

0 comments on commit 60ac98b

Please sign in to comment.