Skip to content

Commit

Permalink
Optimize IVFFLAT without codes
Browse files Browse the repository at this point in the history
Signed-off-by: Yudong Cai <[email protected]>
  • Loading branch information
cydrain authored and liliu-z committed Aug 22, 2023
1 parent da457e9 commit 4176a66
Show file tree
Hide file tree
Showing 6 changed files with 94 additions and 69 deletions.
37 changes: 2 additions & 35 deletions src/index/ivf/ivf.cc
Original file line number Diff line number Diff line change
Expand Up @@ -349,24 +349,6 @@ IvfIndexNode<T>::Add(const DataSet& dataset, const Config& cfg) {
try {
if constexpr (std::is_same<T, faiss::IndexIVFFlat>::value) {
index_->add_without_codes(rows, (const float*)data);
auto raw_data = dataset.GetTensor();
auto invlists = index_->invlists;
auto d = index_->d;
size_t nb = dataset.GetRows();
index_->prefix_sum.resize(invlists->nlist);
size_t curr_index = 0;

auto ails = dynamic_cast<faiss::ArrayInvertedLists*>(invlists);
index_->arranged_codes.resize(d * nb * sizeof(float));
for (size_t i = 0; i < invlists->nlist; i++) {
auto list_size = ails->ids[i].size();
for (size_t j = 0; j < list_size; j++) {
memcpy(index_->arranged_codes.data() + d * (curr_index + j) * sizeof(float),
(uint8_t*)raw_data + d * ails->ids[i][j] * sizeof(float), d * sizeof(float));
}
index_->prefix_sum[i] = curr_index;
curr_index += list_size;
}
} else if constexpr (std::is_same<faiss::IndexBinaryIVF, T>::value) {
index_->add(rows, (const uint8_t*)data);
} else {
Expand Down Expand Up @@ -780,23 +762,8 @@ IvfIndexNode<faiss::IndexIVFFlat>::Deserialize(const BinarySet& binset, const Co
LOG_KNOWHERE_ERROR_ << "Invalid binary set.";
return Status::invalid_binary_set;
}
auto invlists = index_->invlists;
auto d = index_->d;
size_t nb = binary->size / invlists->code_size;
index_->prefix_sum.resize(invlists->nlist);
size_t curr_index = 0;

auto ails = dynamic_cast<faiss::ArrayInvertedLists*>(invlists);
index_->arranged_codes.resize(d * nb * sizeof(float));
for (size_t i = 0; i < invlists->nlist; i++) {
auto list_size = ails->ids[i].size();
for (size_t j = 0; j < list_size; j++) {
memcpy(index_->arranged_codes.data() + d * (curr_index + j) * sizeof(float),
binary->data.get() + d * ails->ids[i][j] * sizeof(float), d * sizeof(float));
}
index_->prefix_sum[i] = curr_index;
curr_index += list_size;
}
size_t nb = binary->size / index_->invlists->code_size;
index_->arrange_codes(nb, (const float*)(binary->data.get()));
} catch (const std::exception& e) {
LOG_KNOWHERE_WARNING_ << "faiss inner error: " << e.what();
return Status::faiss_inner_error;
Expand Down
22 changes: 11 additions & 11 deletions thirdparty/faiss/faiss/IndexIVF.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,9 +224,7 @@ void IndexIVF::add_with_ids_without_codes(
idx_t n,
const float* x,
const idx_t* xids) {
// will be overridden
FAISS_THROW_MSG(
"add_with_ids_without_codes not implemented for this type of index");
FAISS_THROW_MSG("add_with_ids_without_codes not implemented");
}

void IndexIVF::add_sa_codes(idx_t n, const uint8_t* codes, const idx_t* xids) {
Expand Down Expand Up @@ -317,6 +315,14 @@ void IndexIVF::add_core(
ntotal += n;
}

void IndexIVF::add_core_without_codes(
idx_t n,
const float* x,
const idx_t* xids,
const idx_t* coarse_idx) {
FAISS_THROW_MSG("add_core_without_codes not implemented");
}

void IndexIVF::to_readonly() {
if (is_readonly())
return;
Expand All @@ -327,12 +333,7 @@ void IndexIVF::to_readonly() {
}

void IndexIVF::to_readonly_without_codes() {
if (is_readonly())
return;
auto readonly_lists = this->invlists->to_readonly_without_codes();
if (!readonly_lists)
return;
this->replace_invlists(readonly_lists, true);
FAISS_THROW_MSG("to_readonly_without_codes not implemented");
}

bool IndexIVF::is_readonly() const {
Expand Down Expand Up @@ -909,8 +910,7 @@ void IndexIVF::reconstruct(idx_t key, float* recons) const {
}

void IndexIVF::reconstruct_without_codes(idx_t key, float* recons) const {
idx_t lo = direct_map.get(key);
reconstruct_from_offset_without_codes(lo_listno(lo), lo_offset(lo), recons);
FAISS_THROW_MSG("reconstruct_without_codes not implemented");
}

void IndexIVF::reconstruct_n(idx_t i0, idx_t ni, float* recons) const {
Expand Down
10 changes: 6 additions & 4 deletions thirdparty/faiss/faiss/IndexIVF.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,14 +157,10 @@ struct IndexIVF : Index, Level1Quantizer {

/// Calls add_with_ids with NULL ids
void add(idx_t n, const float* x) override;

/// Calls add_with_ids_without_codes
void add_without_codes(idx_t n, const float* x) override;

/// default implementation that calls encode_vectors
void add_with_ids(idx_t n, const float* x, const idx_t* xids) override;

/// Implementation for adding without original vector data
void add_with_ids_without_codes(idx_t n, const float* x, const idx_t* xids)
override;

Expand All @@ -182,6 +178,12 @@ struct IndexIVF : Index, Level1Quantizer {
const idx_t* xids,
const idx_t* precomputed_idx);

virtual void add_core_without_codes(
idx_t n,
const float* x,
const idx_t* xids,
const idx_t* precomputed_idx);

/** Encodes a set of vectors as they would appear in the inverted lists
*
* @param list_nos inverted list ids as returned by the
Expand Down
73 changes: 58 additions & 15 deletions thirdparty/faiss/faiss/IndexIVFFlat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

#include <omp.h>

#include <algorithm>
#include <cinttypes>
#include <cstdio>

Expand Down Expand Up @@ -39,6 +40,33 @@ IndexIVFFlat::IndexIVFFlat(
code_size = sizeof(float) * d;
}

void IndexIVFFlat::arrange_codes(idx_t n, const float* x) {
auto ails = dynamic_cast<faiss::ArrayInvertedLists*>(invlists);
prefix_sum.resize(invlists->nlist + 1);
prefix_sum[0] = 0;
arranged_codes.resize(d * n * sizeof(float));
auto dst = (float*)(arranged_codes.data());
for (size_t i = 0; i < invlists->nlist; i++) {
auto list_size = ails->ids[i].size();
for (size_t j = 0; j < list_size; j++) {
const float* src = x + d * ails->ids[i][j];
std::copy_n(src, d, dst);
dst += d;
}
prefix_sum[i + 1] = prefix_sum[i] + list_size;
}
}

void IndexIVFFlat::add_with_ids_without_codes(
idx_t n,
const float* x,
const idx_t* xids) {
std::unique_ptr<idx_t[]> coarse_idx(new idx_t[n]);
quantizer->assign(n, x, coarse_idx.get());
add_core_without_codes(n, x, xids, coarse_idx.get());
arrange_codes(n, x);
}

void IndexIVFFlat::add_core(
idx_t n,
const float* x,
Expand Down Expand Up @@ -86,38 +114,39 @@ void IndexIVFFlat::add_core(
ntotal += n;
}

// Add ids only, vectors not added to Index.
void IndexIVFFlat::add_with_ids_without_codes(
void IndexIVFFlat::add_core_without_codes(
idx_t n,
const float* x,
const idx_t* xids) {
const idx_t* xids,
const idx_t* coarse_idx) {
FAISS_THROW_IF_NOT(is_trained);
FAISS_THROW_IF_NOT(coarse_idx);
assert(invlists);
direct_map.check_can_add(xids);
const int64_t* idx;
ScopeDeleter<int64_t> del;

int64_t* idx0 = new int64_t[n];
del.set(idx0);
quantizer->assign(n, x, idx0);
idx = idx0;

int64_t n_add = 0;
DirectMapAdd dm_adder(direct_map, n, xids);

for (size_t i = 0; i < n; i++) {
idx_t id = xids ? xids[i] : ntotal + i;
idx_t list_no = idx[i];
size_t offset;
idx_t list_no = coarse_idx[i];

if (list_no >= 0) {
const float* xi = x + i * d;
offset = invlists->add_entry_without_codes(list_no, id);
size_t offset = invlists->add_entry_without_codes(list_no, id);
dm_adder.add(i, list_no, offset);
n_add++;
} else {
offset = 0;
dm_adder.add(i, -1, 0);
}
direct_map.add_single_id(id, list_no, offset);
}

if (verbose) {
printf("IndexIVFFlat::add_core_without_codes: added %" PRId64 " / %" PRId64
" vectors\n",
n_add,
n);
}
ntotal += n;
}

Expand Down Expand Up @@ -254,6 +283,20 @@ InvertedListScanner* IndexIVFFlat::get_InvertedListScanner(
return nullptr;
}

void IndexIVFFlat::to_readonly_without_codes() {
if (is_readonly())
return;
auto readonly_lists = this->invlists->to_readonly_without_codes();
if (!readonly_lists)
return;
this->replace_invlists(readonly_lists, true);
}

void IndexIVFFlat::reconstruct_without_codes(idx_t key, float* recons) const {
idx_t lo = direct_map.get(key);
reconstruct_from_offset_without_codes(lo_listno(lo), lo_offset(lo), recons);
}

void IndexIVFFlat::reconstruct_from_offset(
int64_t list_no,
int64_t offset,
Expand Down
19 changes: 16 additions & 3 deletions thirdparty/faiss/faiss/IndexIVFFlat.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,25 @@ struct IndexIVFFlat : IndexIVF {
size_t nlist_,
MetricType = METRIC_L2);

void arrange_codes(idx_t n, const float* x);

void add_with_ids_without_codes(
idx_t n,
const float* x,
const idx_t* xids) override;

void add_core(
idx_t n,
const float* x,
const float* x_norms,
const idx_t* xids,
const idx_t* precomputed_idx) override;

/// implemented for all IndexIVF* classes
void add_with_ids_without_codes(idx_t n, const float* x, const idx_t* xids)
override;
void add_core_without_codes(
idx_t n,
const float* x,
const idx_t* xids,
const idx_t* precomputed_idx) override;

void encode_vectors(
idx_t n,
Expand All @@ -49,6 +58,10 @@ struct IndexIVFFlat : IndexIVF {
InvertedListScanner* get_InvertedListScanner(
bool store_pairs) const override;

void to_readonly_without_codes() override;

void reconstruct_without_codes(idx_t key, float* recons) const override;

void reconstruct_from_offset(int64_t list_no, int64_t offset, float* recons)
const override;

Expand Down
2 changes: 1 addition & 1 deletion thirdparty/faiss/faiss/invlists/InvertedLists.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ size_t InvertedLists::add_entry(
size_t InvertedLists::add_entry_without_codes(
size_t list_no,
idx_t theid) {
return add_entries_without_codes (list_no, 1, &theid);
return add_entries_without_codes(list_no, 1, &theid);
}

size_t InvertedLists::add_entries_without_codes(
Expand Down

0 comments on commit 4176a66

Please sign in to comment.