Skip to content

Commit

Permalink
add index feature manager & static config check
Browse files Browse the repository at this point in the history
Signed-off-by: xianliang.li <[email protected]>
  • Loading branch information
foxspy committed Sep 4, 2024
1 parent 131fe6d commit e0b027e
Show file tree
Hide file tree
Showing 29 changed files with 506 additions and 193 deletions.
16 changes: 16 additions & 0 deletions include/knowhere/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -622,14 +622,22 @@ class Config {

const float defaultRangeFilter = 1.0f / 0.0;

template <typename DataType>
knowhere::Status
CheckConfig(const std::string& index_type, const int32_t& version, knowhere::Json& json,
knowhere::PARAM_TYPE param_type, std::string& msg);

class BaseConfig : public Config {
public:
CFG_INT dim; // just used for config verify
CFG_STRING metric_type;
CFG_INT k;
CFG_INT num_build_thread;
CFG_BOOL retrieve_friendly;
CFG_STRING data_path;
CFG_STRING index_prefix;

CFG_FLOAT vec_field_size_gb;
// for distance metrics, we search for vectors with distance in [range_filter, radius).
// for similarity metrics, we search for vectors with similarity in (radius, range_filter].
CFG_FLOAT radius;
Expand Down Expand Up @@ -659,6 +667,10 @@ class BaseConfig : public Config {
CFG_FLOAT bm25_b;
CFG_FLOAT bm25_avgdl;
KNOHWERE_DECLARE_CONFIG(BaseConfig) {
KNOWHERE_CONFIG_DECLARE_FIELD(dim)
.allow_empty_without_default()
.description("vector dim")
.for_train();
KNOWHERE_CONFIG_DECLARE_FIELD(metric_type)
.set_default("L2")
.description("metric type")
Expand All @@ -679,6 +691,10 @@ class BaseConfig : public Config {
.allow_empty_without_default()
.for_train()
.for_deserialize();
KNOWHERE_CONFIG_DECLARE_FIELD(vec_field_size_gb)
.description("vector filed size in GB.")
.set_default(0)
.for_train();
KNOWHERE_CONFIG_DECLARE_FIELD(k)
.set_default(10)
.description("search for top k similar vector.")
Expand Down
52 changes: 52 additions & 0 deletions include/knowhere/feature.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// Copyright (C) 2019-2023 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy of
// the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations under
// the License.

#ifndef FEATURE_H
#define FEATURE_H

// these features have been report to outside (milvus); pls sync the feature code when it needs to be changed.
namespace knowhere::feature {
// vector datatype support : binary
constexpr uint64_t BINARY = 1UL << 0;
// vector datatype support : float32
constexpr uint64_t FLOAT32 = 1UL << 1;
// vector datatype support : fp16
constexpr uint64_t FP16 = 1UL << 2;
// vector datatype support : bf16
constexpr uint64_t BF16 = 1UL << 3;
// vector datatype support : sparse_float32
constexpr uint64_t SPARSE_FLOAT32 = 1UL << 4;

// This flag indicates that there is no need to create any index structure (build stage can be skipped)
constexpr uint64_t BF = 1UL << 16;
// This flag indicates that the index defaults to KNN search, meaning the recall rate is 100%
constexpr uint64_t KNN = 1UL << 17;
// This flag indicates that the index is deployed on GPU (need GPU devices)
constexpr uint64_t GPU = 1UL << 18;
// This flag indicates that the index support using mmap manage its mainly memory, which can significant improve the
// capacity
constexpr uint64_t MMAP = 1UL << 19;
// This flag indicates that the index support using materialized view to accelerate filtering search
constexpr uint64_t MV = 1UL << 20;
// This flag indicates that the index need disk during search
constexpr uint64_t DISK = 1UL << 21;

constexpr uint64_t ALL_TYPE = BINARY | FLOAT32 | FP16 | BF16 | SPARSE_FLOAT32;
constexpr uint64_t ALL_DENSE_TYPE = BINARY | FLOAT32 | FP16 | BF16;
constexpr uint64_t ALL_DENSE_FLOAT_TYPE = FLOAT32 | FP16 | BF16;

constexpr uint64_t GPU_KNN_FLOAT_INDEX = FLOAT32 | GPU | KNN;
constexpr uint64_t GPU_ANN_FLOAT_INDEX = FLOAT32 | GPU;
} // namespace knowhere::feature
#endif /* FEATURE_H */
4 changes: 3 additions & 1 deletion include/knowhere/index/index.h
Original file line number Diff line number Diff line change
Expand Up @@ -193,8 +193,10 @@ class Index {
if (node == nullptr)
return;
node->DecRef();
if (!node->Ref())
if (!node->Ref()) {
delete node;
node = nullptr;
}
}

private:
Expand Down
79 changes: 68 additions & 11 deletions include/knowhere/index/index_factory.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,18 @@ class IndexFactory {
public:
template <typename DataType>
expected<Index<IndexNode>>
Create(const std::string& name, const int32_t& version, const Object& object = nullptr);
Create(const std::string& name, const int32_t& version, const Object& object = nullptr, bool runtimeCheck = true);
template <typename DataType>
const IndexFactory&
Register(const std::string& name, std::function<Index<IndexNode>(const int32_t&, const Object&)> func);
Register(const std::string& name, std::function<Index<IndexNode>(const int32_t&, const Object&)> func,
const uint64_t features);
static IndexFactory&
Instance();
typedef std::tuple<std::set<std::pair<std::string, VecType>>, std::set<std::string>> GlobalIndexTable;
bool
FeatureCheck(const std::string& name, uint64_t feature) const;
static const std::map<std::string, uint64_t>&
GetIndexFeatures();
static GlobalIndexTable&
StaticIndexTableInstance();

Expand All @@ -47,36 +52,88 @@ class IndexFactory {
std::function<T1(const int32_t&, const Object&)> fun_value;
};
typedef std::map<std::string, std::unique_ptr<FunMapValueBase>> FuncMap;
typedef std::map<std::string, uint64_t> FeatureMap;
IndexFactory();
static FuncMap&
MapInstance();
static FeatureMap&
FeatureMapInstance();
};

#define KNOWHERE_CONCAT(x, y) index_factory_ref_##x##y
#define KNOWHERE_REGISTER_GLOBAL(name, func, data_type) \
const IndexFactory& KNOWHERE_CONCAT(name, data_type) = IndexFactory::Instance().Register<data_type>(#name, func)
#define KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, data_type, ...) \
#define KNOWHERE_REGISTER_GLOBAL(name, func, data_type, condition, features) \
const IndexFactory& KNOWHERE_CONCAT(name, data_type) = \
condition ? IndexFactory::Instance().Register<data_type>(#name, func, features) : IndexFactory::Instance();

#define KNOWHERE_REGISTER_FUNC_GLOBAL(name, func, data_type, features) \
KNOWHERE_REGISTER_GLOBAL(name, func, data_type, typeCheck<data_type>(features), features)

#define KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, data_type, features, ...) \
KNOWHERE_REGISTER_GLOBAL( \
name, \
(static_cast<Index<index_node<data_type, ##__VA_ARGS__>> (*)(const int32_t&, const Object&)>( \
&Index<index_node<data_type, ##__VA_ARGS__>>::Create)), \
data_type)
#define KNOWHERE_MOCK_REGISTER_GLOBAL(name, index_node, data_type, ...) \
data_type, typeCheck<data_type>(features), features)

#define KNOWHERE_MOCK_REGISTER_GLOBAL(name, index_node, data_type, features, ...) \
KNOWHERE_REGISTER_GLOBAL( \
name, \
[](const int32_t& version, const Object& object) { \
return (Index<IndexNodeDataMockWrapper<data_type>>::Create( \
std::make_unique<index_node<MockData<data_type>::type, ##__VA_ARGS__>>(version, object))); \
}, \
data_type)
#define KNOWHERE_REGISTER_GLOBAL_WITH_THREAD_POOL(name, index_node, data_type, thread_size) \
KNOWHERE_REGISTER_GLOBAL( \
data_type, typeCheck<data_type>(features), features)

#define KNOWHERE_SIMPLE_REGISTER_ALL_GLOBAL(name, index_node, features, ...) \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, bin1, (features | knowhere::feature::ALL_TYPE), ##__VA_ARGS__); \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, bf16, (features | knowhere::feature::ALL_TYPE), ##__VA_ARGS__); \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, fp16, (features | knowhere::feature::ALL_TYPE), ##__VA_ARGS__); \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, fp32, (features | knowhere::feature::ALL_TYPE), ##__VA_ARGS__);

#define KNOWHERE_SIMPLE_REGISTER_SPARSE_FLOAT_GLOBAL(name, index_node, features, ...) \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, fp32, (features | knowhere::feature::SPARSE_FLOAT32), \
##__VA_ARGS__);

#define KNOWHERE_SIMPLE_REGISTER_DENSE_ALL_GLOBAL(name, index_node, features, ...) \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, bin1, (features | knowhere::feature::ALL_DENSE_TYPE), \
##__VA_ARGS__); \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, bf16, (features | knowhere::feature::ALL_DENSE_TYPE), \
##__VA_ARGS__); \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, fp16, (features | knowhere::feature::ALL_DENSE_TYPE), \
##__VA_ARGS__); \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, fp32, (features | knowhere::feature::ALL_DENSE_TYPE), \
##__VA_ARGS__);

#define KNOWHERE_SIMPLE_REGISTER_DENSE_BIN_GLOBAL(name, index_node, features, ...) \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, bin1, (features | knowhere::feature::BINARY), ##__VA_ARGS__);

#define KNOWHERE_SIMPLE_REGISTER_DENSE_FLOAT32_GLOBAL(name, index_node, features, ...) \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, fp32, (features | knowhere::feature::FLOAT32), ##__VA_ARGS__);

#define KNOWHERE_SIMPLE_REGISTER_DENSE_FLOAT_ALL_GLOBAL(name, index_node, features, ...) \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, bf16, (features | knowhere::feature::ALL_DENSE_FLOAT_TYPE), \
##__VA_ARGS__); \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, fp16, (features | knowhere::feature::ALL_DENSE_FLOAT_TYPE), \
##__VA_ARGS__); \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, fp32, (features | knowhere::feature::ALL_DENSE_FLOAT_TYPE), \
##__VA_ARGS__);

#define KNOWHERE_MOCK_REGISTER_DENSE_FLOAT_ALL_GLOBAL(name, index_node, features, ...) \
KNOWHERE_MOCK_REGISTER_GLOBAL(name, index_node, bf16, (features | knowhere::feature::ALL_DENSE_FLOAT_TYPE), \
##__VA_ARGS__); \
KNOWHERE_MOCK_REGISTER_GLOBAL(name, index_node, fp16, (features | knowhere::feature::ALL_DENSE_FLOAT_TYPE), \
##__VA_ARGS__); \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, fp32, (features | knowhere::feature::ALL_DENSE_FLOAT_TYPE), \
##__VA_ARGS__);

#define KNOWHERE_REGISTER_GLOBAL_WITH_THREAD_POOL(name, index_node, data_type, features, thread_size) \
KNOWHERE_REGISTER_FUNC_GLOBAL( \
name, \
[](const int32_t& version, const Object& object) { \
return (Index<IndexNodeThreadPoolWrapper>::Create( \
std::make_unique<index_node<MockData<data_type>::type>>(version, object), thread_size)); \
}, \
data_type)
data_type, features)
#define KNOWHERE_SET_STATIC_GLOBAL_INDEX_TABLE(table_index, name, index_table) \
static int name = []() -> int { \
auto& static_index_table = std::get<table_index>(IndexFactory::StaticIndexTableInstance()); \
Expand Down
5 changes: 5 additions & 0 deletions include/knowhere/index/index_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,11 @@ class IndexNode : public Object {
virtual bool
HasRawData(const std::string& metric_type) const = 0;

virtual Status
ConfigCheck(const Config& cfg, PARAM_TYPE paramType, std::string& msg) const {
return knowhere::Status::success;
}

virtual bool
IsAdditionalScalarSupported() const {
return false;
Expand Down
21 changes: 21 additions & 0 deletions include/knowhere/operands.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
#include <cstdint>
#include <cstring>

#include "feature.h"

namespace {
union fp32_bits {
uint32_t as_bits;
Expand Down Expand Up @@ -143,6 +145,25 @@ struct bf16 {
}
};

template <typename T>
bool
typeCheck(uint64_t features) {
if constexpr (std::is_same_v<T, bin1>) {
return features & knowhere::feature::BINARY;
}
if constexpr (std::is_same_v<T, fp16>) {
return features & knowhere::feature::FP16;
}
if constexpr (std::is_same_v<T, bf16>) {
return features & knowhere::feature::BF16;
}
// TODO : add sparse_fp32 data type
if constexpr (std::is_same_v<T, fp32>) {
return (features & knowhere::feature::FLOAT32) || (features & knowhere::feature::SPARSE_FLOAT32);
}
return false;
}

template <typename InType, typename... Types>
using TypeMatch = std::bool_constant<(... | std::is_same_v<InType, Types>)>;
template <typename InType>
Expand Down
4 changes: 4 additions & 0 deletions include/knowhere/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <strings.h>

#include <algorithm>
#include <map>
#include <vector>

#include "knowhere/binaryset.h"
Expand Down Expand Up @@ -186,6 +187,9 @@ ConvertIVFFlat(const BinarySet& binset, const MetricType metric_type, const uint
bool
UseDiskLoad(const std::string& index_type, const int32_t& /*version*/);

bool
ParamCheck(const std::string& index_type, const std::map<std::string, std::string>& config);

template <typename T, typename W>
static void
writeBinaryPOD(W& out, const T& podRef) {
Expand Down
75 changes: 28 additions & 47 deletions src/common/config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "index/hnsw/hnsw_config.h"
#include "index/ivf/ivf_config.h"
#include "index/sparse/sparse_inverted_index_config.h"
#include "knowhere/index/index_factory.h"
#include "knowhere/log.h"

namespace knowhere {
Expand Down Expand Up @@ -119,54 +120,34 @@ Config::FormatAndCheck(const Config& cfg, Json& json, std::string* const err_msg
return Status::success;
}

} // namespace knowhere

extern "C" __attribute__((visibility("default"))) int
CheckConfig(int index_type, char const* str, int n, int param_type);

int
CheckConfig(int index_type, const char* str, int n, int param_type) {
if (!str || n <= 0) {
return int(knowhere::Status::invalid_args);
}
knowhere::Json json = knowhere::Json::parse(str, str + n);
std::unique_ptr<knowhere::Config> cfg;

switch (index_type) {
case 0:
cfg = std::make_unique<knowhere::FlatConfig>();
break;
case 1:
cfg = std::make_unique<knowhere::DiskANNConfig>();
break;
case 2:
cfg = std::make_unique<knowhere::HnswConfig>();
break;
case 3:
cfg = std::make_unique<knowhere::IvfFlatConfig>();
break;
case 4:
cfg = std::make_unique<knowhere::IvfPqConfig>();
break;
case 5:
cfg = std::make_unique<knowhere::GpuRaftCagraConfig>();
break;
case 6:
cfg = std::make_unique<knowhere::GpuRaftIvfPqConfig>();
break;
case 7:
cfg = std::make_unique<knowhere::GpuRaftIvfFlatConfig>();
break;
case 8:
cfg = std::make_unique<knowhere::GpuRaftBruteForceConfig>();
break;
default:
return int(knowhere::Status::invalid_args);
template <typename DataType>
knowhere::Status
CheckConfig(const std::string& index_type, const int32_t& version, knowhere::Json& json,
knowhere::PARAM_TYPE param_type, std::string& msg) {
auto index = knowhere::IndexFactory::Instance().Create<DataType>(index_type, version, nullptr, false);
if (!index.has_value()) {
msg = index.what();
return index.error();
}

auto res = knowhere::Config::FormatAndCheck(*cfg, json, nullptr);
auto cfg = index.value().Node()->CreateConfig();
auto res = knowhere::Config::FormatAndCheck(*cfg, json, &msg);
if (res != knowhere::Status::success) {
return int(res);
return res;
}
return int(knowhere::Config::Load(*cfg, json, knowhere::PARAM_TYPE(param_type), nullptr));
return knowhere::Config::Load(*cfg, json, knowhere::PARAM_TYPE(param_type), &msg);
}

template knowhere::Status
CheckConfig<knowhere::bin1>(const std::string& index_type, const int32_t& version, knowhere::Json& json,
knowhere::PARAM_TYPE param_type, std::string& msg);
template knowhere::Status
CheckConfig<knowhere::fp32>(const std::string& index_type, const int32_t& version, knowhere::Json& json,
knowhere::PARAM_TYPE param_type, std::string& msg);
template knowhere::Status
CheckConfig<knowhere::bf16>(const std::string& index_type, const int32_t& version, knowhere::Json& json,
knowhere::PARAM_TYPE param_type, std::string& msg);
template knowhere::Status
CheckConfig<knowhere::fp16>(const std::string& index_type, const int32_t& version, knowhere::Json& json,
knowhere::PARAM_TYPE param_type, std::string& msg);

} // namespace knowhere
Loading

0 comments on commit e0b027e

Please sign in to comment.