Skip to content

Commit

Permalink
add index feature manager & static config check
Browse files Browse the repository at this point in the history
Signed-off-by: xianliang.li <[email protected]>
  • Loading branch information
foxspy committed Oct 12, 2024
1 parent b201140 commit 0f281f7
Show file tree
Hide file tree
Showing 32 changed files with 569 additions and 203 deletions.
8 changes: 8 additions & 0 deletions include/knowhere/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -501,12 +501,15 @@ const float defaultRangeFilter = 1.0f / 0.0;

class BaseConfig : public Config {
public:
CFG_INT dim; // just used for config verify
CFG_STRING metric_type;
CFG_INT k;
CFG_INT num_build_thread;
CFG_BOOL retrieve_friendly;
CFG_STRING data_path;
CFG_STRING index_prefix;

CFG_FLOAT vec_field_size_gb;
// for distance metrics, we search for vectors with distance in [range_filter, radius).
// for similarity metrics, we search for vectors with similarity in (radius, range_filter].
CFG_FLOAT radius;
Expand Down Expand Up @@ -536,6 +539,7 @@ class BaseConfig : public Config {
CFG_FLOAT bm25_b;
CFG_FLOAT bm25_avgdl;
KNOHWERE_DECLARE_CONFIG(BaseConfig) {
KNOWHERE_CONFIG_DECLARE_FIELD(dim).allow_empty_without_default().description("vector dim").for_train();
KNOWHERE_CONFIG_DECLARE_FIELD(metric_type)
.set_default("L2")
.description("metric type")
Expand All @@ -558,6 +562,10 @@ class BaseConfig : public Config {
.allow_empty_without_default()
.for_train()
.for_deserialize();
KNOWHERE_CONFIG_DECLARE_FIELD(vec_field_size_gb)
.description("vector filed size in GB.")
.set_default(0)
.for_train();
KNOWHERE_CONFIG_DECLARE_FIELD(k)
.set_default(10)
.description("search for top k similar vector.")
Expand Down
53 changes: 53 additions & 0 deletions include/knowhere/feature.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// Copyright (C) 2019-2023 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy of
// the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations under
// the License.

#ifndef FEATURE_H
#define FEATURE_H

// these features have been report to outside (milvus); pls sync the feature code when it needs to be changed.
namespace knowhere::feature {
// vector datatype support : binary
constexpr uint64_t BINARY = 1UL << 0;
// vector datatype support : float32
constexpr uint64_t FLOAT32 = 1UL << 1;
// vector datatype support : fp16
constexpr uint64_t FP16 = 1UL << 2;
// vector datatype support : bf16
constexpr uint64_t BF16 = 1UL << 3;
// vector datatype support : sparse_float32
constexpr uint64_t SPARSE_FLOAT32 = 1UL << 4;

// This flag indicates that there is no need to create any index structure (build stage can be skipped)
constexpr uint64_t NO_TRAIN = 1UL << 16;
// This flag indicates that the index defaults to KNN search, meaning the recall is 100%
constexpr uint64_t KNN = 1UL << 17;
// This flag indicates that the index search stage will be performed on GPU (need GPU devices)
constexpr uint64_t GPU = 1UL << 18;
// This flag indicates that the index support using mmap manage its mainly memory, which can significant improve the
// capacity
constexpr uint64_t MMAP = 1UL << 19;
// This flag indicates that the index support using materialized view to accelerate filtering search
constexpr uint64_t MV = 1UL << 20;
// This flag indicates that the index need disk during search stage
constexpr uint64_t DISK = 1UL << 21;

constexpr uint64_t ALL_TYPE = BINARY | FLOAT32 | FP16 | BF16 | SPARSE_FLOAT32;
constexpr uint64_t ALL_DENSE_TYPE = BINARY | FLOAT32 | FP16 | BF16;
constexpr uint64_t ALL_DENSE_FLOAT_TYPE = FLOAT32 | FP16 | BF16;

constexpr uint64_t NO_TRAIN_INDEX = NO_TRAIN;
constexpr uint64_t GPU_KNN_FLOAT_INDEX = FLOAT32 | GPU | KNN;
constexpr uint64_t GPU_ANN_FLOAT_INDEX = FLOAT32 | GPU;
} // namespace knowhere::feature
#endif /* FEATURE_H */
4 changes: 3 additions & 1 deletion include/knowhere/index/index.h
Original file line number Diff line number Diff line change
Expand Up @@ -193,8 +193,10 @@ class Index {
if (node == nullptr)
return;
node->DecRef();
if (!node->Ref())
if (!node->Ref()) {
delete node;
node = nullptr;
}
}

private:
Expand Down
78 changes: 67 additions & 11 deletions include/knowhere/index/index_factory.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,15 @@ class IndexFactory {
Create(const std::string& name, const int32_t& version, const Object& object = nullptr);
template <typename DataType>
const IndexFactory&
Register(const std::string& name, std::function<Index<IndexNode>(const int32_t&, const Object&)> func);
Register(const std::string& name, std::function<Index<IndexNode>(const int32_t&, const Object&)> func,
const uint64_t features);
static IndexFactory&
Instance();
typedef std::tuple<std::set<std::pair<std::string, VecType>>, std::set<std::string>> GlobalIndexTable;
bool
FeatureCheck(const std::string& name, uint64_t feature) const;
static const std::map<std::string, uint64_t>&
GetIndexFeatures();
static GlobalIndexTable&
StaticIndexTableInstance();

Expand All @@ -48,47 +53,98 @@ class IndexFactory {
std::function<T1(const int32_t&, const Object&)> fun_value;
};
typedef std::map<std::string, std::unique_ptr<FunMapValueBase>> FuncMap;
typedef std::map<std::string, uint64_t> FeatureMap;
IndexFactory();
static FuncMap&
MapInstance();
static FeatureMap&
FeatureMapInstance();
};

#define KNOWHERE_CONCAT(x, y) index_factory_ref_##x##y
#define KNOWHERE_REGISTER_GLOBAL(name, func, data_type, condition, features) \
const IndexFactory& KNOWHERE_CONCAT(name, data_type) = \
condition ? IndexFactory::Instance().Register<data_type>(#name, func, features) : IndexFactory::Instance();

#define KNOWHERE_REGISTER_FUNC_GLOBAL(name, func, data_type, features) \
KNOWHERE_REGISTER_GLOBAL(name, func, data_type, typeCheck<data_type>(features), features)

#define KNOWHERE_FACTOR_CONCAT(x, y) index_factory_ref_##x##y
#define KNOWHERE_STATIC_CONCAT(x, y) index_static_ref_##x##y

#define KNOWHERE_REGISTER_STATIC(name, index_node, data_type, ...) \
const IndexStaticFaced<data_type>& KNOWHERE_STATIC_CONCAT(name, data_type) = \
IndexStaticFaced<data_type>::Instance().RegisterStaticFunc<index_node<data_type, ##__VA_ARGS__>>(#name);

#define KNOWHERE_REGISTER_GLOBAL(name, func, data_type) \
const IndexFactory& KNOWHERE_FACTOR_CONCAT(name, data_type) = \
IndexFactory::Instance().Register<data_type>(#name, func)

#define KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, data_type, ...) \
#define KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, data_type, features, ...) \
KNOWHERE_REGISTER_STATIC(name, index_node, data_type, ##__VA_ARGS__) \
KNOWHERE_REGISTER_GLOBAL( \
name, \
(static_cast<Index<index_node<data_type, ##__VA_ARGS__>> (*)(const int32_t&, const Object&)>( \
&Index<index_node<data_type, ##__VA_ARGS__>>::Create)), \
data_type)
#define KNOWHERE_MOCK_REGISTER_GLOBAL(name, index_node, data_type, ...) \
data_type, typeCheck<data_type>(features), features)

#define KNOWHERE_MOCK_REGISTER_GLOBAL(name, index_node, data_type, features, ...) \
KNOWHERE_REGISTER_STATIC(name, index_node, data_type, ##__VA_ARGS__) \
KNOWHERE_REGISTER_GLOBAL( \
name, \
[](const int32_t& version, const Object& object) { \
return (Index<IndexNodeDataMockWrapper<data_type>>::Create( \
std::make_unique<index_node<MockData<data_type>::type, ##__VA_ARGS__>>(version, object))); \
}, \
data_type)
#define KNOWHERE_REGISTER_GLOBAL_WITH_THREAD_POOL(name, index_node, data_type, thread_size) \
data_type, typeCheck<data_type>(features), features)

#define KNOWHERE_SIMPLE_REGISTER_ALL_GLOBAL(name, index_node, features, ...) \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, bin1, (features | knowhere::feature::ALL_TYPE), ##__VA_ARGS__); \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, bf16, (features | knowhere::feature::ALL_TYPE), ##__VA_ARGS__); \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, fp16, (features | knowhere::feature::ALL_TYPE), ##__VA_ARGS__); \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, fp32, (features | knowhere::feature::ALL_TYPE), ##__VA_ARGS__);

#define KNOWHERE_SIMPLE_REGISTER_SPARSE_FLOAT_GLOBAL(name, index_node, features, ...) \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, fp32, (features | knowhere::feature::SPARSE_FLOAT32), \
##__VA_ARGS__);

#define KNOWHERE_SIMPLE_REGISTER_DENSE_ALL_GLOBAL(name, index_node, features, ...) \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, bin1, (features | knowhere::feature::ALL_DENSE_TYPE), \
##__VA_ARGS__); \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, bf16, (features | knowhere::feature::ALL_DENSE_TYPE), \
##__VA_ARGS__); \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, fp16, (features | knowhere::feature::ALL_DENSE_TYPE), \
##__VA_ARGS__); \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, fp32, (features | knowhere::feature::ALL_DENSE_TYPE), \
##__VA_ARGS__);

#define KNOWHERE_SIMPLE_REGISTER_DENSE_BIN_GLOBAL(name, index_node, features, ...) \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, bin1, (features | knowhere::feature::BINARY), ##__VA_ARGS__);

#define KNOWHERE_SIMPLE_REGISTER_DENSE_FLOAT32_GLOBAL(name, index_node, features, ...) \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, fp32, (features | knowhere::feature::FLOAT32), ##__VA_ARGS__);

#define KNOWHERE_SIMPLE_REGISTER_DENSE_FLOAT_ALL_GLOBAL(name, index_node, features, ...) \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, bf16, (features | knowhere::feature::ALL_DENSE_FLOAT_TYPE), \
##__VA_ARGS__); \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, fp16, (features | knowhere::feature::ALL_DENSE_FLOAT_TYPE), \
##__VA_ARGS__); \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, fp32, (features | knowhere::feature::ALL_DENSE_FLOAT_TYPE), \
##__VA_ARGS__);

#define KNOWHERE_MOCK_REGISTER_DENSE_FLOAT_ALL_GLOBAL(name, index_node, features, ...) \
KNOWHERE_MOCK_REGISTER_GLOBAL(name, index_node, bf16, (features | knowhere::feature::ALL_DENSE_FLOAT_TYPE), \
##__VA_ARGS__); \
KNOWHERE_MOCK_REGISTER_GLOBAL(name, index_node, fp16, (features | knowhere::feature::ALL_DENSE_FLOAT_TYPE), \
##__VA_ARGS__); \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, fp32, (features | knowhere::feature::ALL_DENSE_FLOAT_TYPE), \
##__VA_ARGS__);

#define KNOWHERE_REGISTER_GLOBAL_WITH_THREAD_POOL(name, index_node, data_type, features, thread_size) \
KNOWHERE_REGISTER_STATIC(name, index_node, data_type) \
KNOWHERE_REGISTER_GLOBAL( \
name, \
[](const int32_t& version, const Object& object) { \
return (Index<IndexNodeThreadPoolWrapper>::Create( \
std::make_unique<index_node<MockData<data_type>::type>>(version, object), thread_size)); \
}, \
data_type)
data_type, typeCheck<data_type>(features), features)
#define KNOWHERE_SET_STATIC_GLOBAL_INDEX_TABLE(table_index, name, index_table) \
static int name = []() -> int { \
auto& static_index_table = std::get<table_index>(IndexFactory::StaticIndexTableInstance()); \
Expand Down
14 changes: 14 additions & 0 deletions include/knowhere/index/index_static.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ struct Resource {
DEFINE_HAS_STATIC_FUNC(StaticCreateConfig)
DEFINE_HAS_STATIC_FUNC(StaticEstimateLoadResource)
DEFINE_HAS_STATIC_FUNC(StaticHasRawData)
DEFINE_HAS_STATIC_FUNC(StaticConfigCheck)

template <typename DataType>
class IndexStaticFaced {
Expand All @@ -61,6 +62,10 @@ class IndexStaticFaced {
static std::unique_ptr<BaseConfig>
CreateConfig(const knowhere::IndexType& indexType, const knowhere::IndexVersion& version);

static knowhere::Status
ConfigCheck(const knowhere::IndexType& indexType, const knowhere::IndexVersion& version,
const knowhere::Json& params, std::string& msg);

/**
* @brief estimate the memory and disk resource usage before index loading by index params
* @param indexType vector index type (HNSW, IVFFLAT, etc)
Expand Down Expand Up @@ -103,6 +108,11 @@ class IndexStaticFaced {
staticHasRawDataMap[indexType] = VecIndexNode::StaticHasRawData;
}

if constexpr (has_static_StaticConfigCheck<VecIndexNode,
decltype(IndexStaticFaced<DataType>::InternalConfigCheck)>::value) {
staticConfigCheckMap[indexType] = VecIndexNode::StaticConfigCheck;
}

return Instance();
}

Expand All @@ -117,12 +127,16 @@ class IndexStaticFaced {
static bool
InternalStaticHasRawData(const knowhere::BaseConfig& config, const IndexVersion& version);

static knowhere::Status
InternalConfigCheck(const knowhere::BaseConfig& config, const IndexVersion& version, std::string& msg);

static std::unique_ptr<BaseConfig>
InternalStaticCreateConfig();

std::map<std::string, std::function<decltype(InternalStaticCreateConfig)>> staticCreateConfigMap;
std::map<std::string, std::function<decltype(InternalStaticHasRawData)>> staticHasRawDataMap;
std::map<std::string, std::function<decltype(InternalEstimateLoadResource)>> staticEstimateLoadResourceMap;
std::map<std::string, std::function<decltype(InternalConfigCheck)>> staticConfigCheckMap;
};

} // namespace knowhere
Expand Down
21 changes: 21 additions & 0 deletions include/knowhere/operands.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
#include <cstdint>
#include <cstring>

#include "feature.h"

namespace {
union fp32_bits {
uint32_t as_bits;
Expand Down Expand Up @@ -143,6 +145,25 @@ struct bf16 {
}
};

template <typename T>
bool
typeCheck(uint64_t features) {
if constexpr (std::is_same_v<T, bin1>) {
return features & knowhere::feature::BINARY;
}
if constexpr (std::is_same_v<T, fp16>) {
return features & knowhere::feature::FP16;
}
if constexpr (std::is_same_v<T, bf16>) {
return features & knowhere::feature::BF16;
}
// TODO : add sparse_fp32 data type
if constexpr (std::is_same_v<T, fp32>) {
return (features & knowhere::feature::FLOAT32) || (features & knowhere::feature::SPARSE_FLOAT32);
}
return false;
}

template <typename InType, typename... Types>
using TypeMatch = std::bool_constant<(... | std::is_same_v<InType, Types>)>;
template <typename InType>
Expand Down
1 change: 1 addition & 0 deletions include/knowhere/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <strings.h>

#include <algorithm>
#include <map>
#include <vector>

#include "knowhere/binaryset.h"
Expand Down
Loading

0 comments on commit 0f281f7

Please sign in to comment.