diff --git a/searchcore/src/tests/proton/matching/matching_test.cpp b/searchcore/src/tests/proton/matching/matching_test.cpp index e30d79ecb0e7..9fb772e1e6f9 100644 --- a/searchcore/src/tests/proton/matching/matching_test.cpp +++ b/searchcore/src/tests/proton/matching/matching_test.cpp @@ -1262,13 +1262,13 @@ TEST_F(MatchingTest, require_that_docsum_matcher_can_extract_matching_elements_f using FMA = vespalib::FuzzyMatchingAlgorithm; -struct AttributeBlueprintParamsFixture { +struct CreateBlueprintParamsFixture { BlueprintFactory factory; search::fef::test::IndexEnvironment index_env; RankSetup rank_setup; Properties rank_properties; - AttributeBlueprintParamsFixture(double lower_limit, double upper_limit, double target_hits_max_adjustment_factor, - FMA fuzzy_matching_algorithm) + CreateBlueprintParamsFixture(double lower_limit, double upper_limit, double target_hits_max_adjustment_factor, + FMA fuzzy_matching_algorithm) : factory(), index_env(), rank_setup(factory, index_env), @@ -1281,44 +1281,49 @@ struct AttributeBlueprintParamsFixture { } void set_query_properties(std::string_view lower_limit, std::string_view upper_limit, std::string_view target_hits_max_adjustment_factor, - const std::string & fuzzy_matching_algorithm) { + std::string_view fuzzy_matching_algorithm, + std::string_view disk_index_bitvector_limit) { rank_properties.add(GlobalFilterLowerLimit::NAME, lower_limit); rank_properties.add(GlobalFilterUpperLimit::NAME, upper_limit); rank_properties.add(TargetHitsMaxAdjustmentFactor::NAME, target_hits_max_adjustment_factor); rank_properties.add(FuzzyAlgorithm::NAME, fuzzy_matching_algorithm); + rank_properties.add(DiskIndexBitvectorLimit::NAME, disk_index_bitvector_limit); } - ~AttributeBlueprintParamsFixture(); + ~CreateBlueprintParamsFixture(); CreateBlueprintParams extract(uint32_t active_docids = 9, uint32_t docid_limit = 10) const { return MatchToolsFactory::extract_create_blueprint_params(rank_setup, rank_properties, active_docids, docid_limit); } }; -AttributeBlueprintParamsFixture::~AttributeBlueprintParamsFixture() = default; +CreateBlueprintParamsFixture::~CreateBlueprintParamsFixture() = default; -TEST_F(MatchingTest, attribute_blueprint_params_are_extracted_from_rank_profile) +TEST_F(MatchingTest, create_blueprint_params_are_extracted_from_rank_profile) { - AttributeBlueprintParamsFixture f(0.2, 0.8, 5.0, FMA::DfaTable); + CreateBlueprintParamsFixture f(0.2, 0.8, 5.0, FMA::DfaTable); + f.rank_setup.set_disk_index_bitvector_limit(0.04); auto params = f.extract(); EXPECT_EQ(0.2, params.global_filter_lower_limit); EXPECT_EQ(0.8, params.global_filter_upper_limit); EXPECT_EQ(5.0, params.target_hits_max_adjustment_factor); EXPECT_EQ(FMA::DfaTable, params.fuzzy_matching_algorithm); + EXPECT_EQ(0.04, params.disk_index_bitvector_limit); } -TEST_F(MatchingTest, attribute_blueprint_params_are_extracted_from_query) +TEST_F(MatchingTest, create_blueprint_params_are_extracted_from_query) { - AttributeBlueprintParamsFixture f(0.2, 0.8, 5.0, FMA::DfaTable); - f.set_query_properties("0.15", "0.75", "3.0", "dfa_explicit"); + CreateBlueprintParamsFixture f(0.2, 0.8, 5.0, FMA::DfaTable); + f.set_query_properties("0.15", "0.75", "3.0", "dfa_explicit", "0.02"); auto params = f.extract(); EXPECT_EQ(0.15, params.global_filter_lower_limit); EXPECT_EQ(0.75, params.global_filter_upper_limit); EXPECT_EQ(3.0, params.target_hits_max_adjustment_factor); EXPECT_EQ(FMA::DfaExplicit, params.fuzzy_matching_algorithm); + EXPECT_EQ(0.02, params.disk_index_bitvector_limit); } TEST_F(MatchingTest, global_filter_params_are_scaled_with_active_hit_ratio) { - AttributeBlueprintParamsFixture f(0.2, 0.8, 5.0, FMA::DfaTable); + CreateBlueprintParamsFixture f(0.2, 0.8, 5.0, FMA::DfaTable); auto params = f.extract(5, 10); EXPECT_EQ(0.12, params.global_filter_lower_limit); EXPECT_EQ(0.48, params.global_filter_upper_limit); @@ -1326,7 +1331,7 @@ TEST_F(MatchingTest, global_filter_params_are_scaled_with_active_hit_ratio) TEST_F(MatchingTest, weak_and_stop_word_strategy_is_resolved_correctly) { - AttributeBlueprintParamsFixture f(0.2, 0.8, 5.0, FMA::DfaTable); + CreateBlueprintParamsFixture f(0.2, 0.8, 5.0, FMA::DfaTable); EXPECT_EQ(WeakAndStopWordAdjustLimit::DEFAULT_VALUE, 1.0); EXPECT_EQ(WeakAndStopWordDropLimit::DEFAULT_VALUE, 1.0); EXPECT_EQ(f.rank_setup.get_weakand_stop_word_adjust_limit(), 1.0); diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp index 013bf4b79f0a..66de47157879 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp @@ -356,6 +356,7 @@ MatchToolsFactory::extract_create_blueprint_params(const RankSetup& rank_setup, double weakand_range = temporary::WeakAndRange::lookup(rank_properties, rank_setup.get_weakand_range()); double weakand_stop_word_adjust_limit = WeakAndStopWordAdjustLimit::lookup(rank_properties, rank_setup.get_weakand_stop_word_adjust_limit()); double weakand_stop_word_drop_limit = WeakAndStopWordDropLimit::lookup(rank_properties, rank_setup.get_weakand_stop_word_drop_limit()); + double disk_index_bitvector_limit = DiskIndexBitvectorLimit::lookup(rank_properties, rank_setup.get_disk_index_bitvector_limit()); // Note that we count the reserved docid 0 as active. // This ensures that when searchable-copies=1, the ratio is 1.0. @@ -367,7 +368,8 @@ MatchToolsFactory::extract_create_blueprint_params(const RankSetup& rank_setup, fuzzy_matching_algorithm, weakand_range, StopWordStrategy(weakand_stop_word_adjust_limit, - weakand_stop_word_drop_limit, docid_limit)}; + weakand_stop_word_drop_limit, docid_limit), + disk_index_bitvector_limit}; } AttributeOperationTask::AttributeOperationTask(const RequestContext & requestContext, diff --git a/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp b/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp index 97e1d00577ad..61b758ebb402 100644 --- a/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp +++ b/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp @@ -142,6 +142,7 @@ class DiskIndexTest : public ::testing::Test, public TestDiskIndex { void test_io_settings(const IOSettings& io_settings); SimpleResult search(const FieldIndex& field_index, const DictionaryLookupResult& lookup_result, const PostingListHandle& handle); + Blueprint::UP create_blueprint(const FieldSpec& field, const search::query::Node& term, uint32_t docid_limit=1000); }; DiskIndexTest::DiskIndexTest() = default; @@ -256,6 +257,14 @@ DiskIndexTest::search(const FieldIndex& field_index, const DictionaryLookupResul return SimpleResult().search(*sb); } +Blueprint::UP +DiskIndexTest::create_blueprint(const FieldSpec& field, const search::query::Node& term, uint32_t docid_limit) +{ + auto b = _index->createBlueprint(_requestContext, field, term); + b->basic_plan(true, docid_limit); + b->fetchPostings(search::queryeval::ExecuteInfo::FULL); + return b; +} void DiskIndexTest::requireThatWeCanReadPostingList(const IOSettings& io_settings) @@ -327,28 +336,23 @@ void DiskIndexTest::requireThatBlueprintIsCreated() { { // unknown field - Blueprint::UP b = - _index->createBlueprint(_requestContext, FieldSpec("none", 0, 0), makeTerm("w1")); - EXPECT_TRUE(dynamic_cast(b.get()) != NULL); + auto b = _index->createBlueprint(_requestContext, FieldSpec("none", 0, 0), makeTerm("w1")); + EXPECT_TRUE(dynamic_cast(b.get()) != nullptr); } { // unknown word - Blueprint::UP b = - _index->createBlueprint(_requestContext, FieldSpec("f1", 0, 0), makeTerm("none")); - EXPECT_TRUE(dynamic_cast(b.get()) != NULL); + auto b = _index->createBlueprint(_requestContext, FieldSpec("f1", 0, 0), makeTerm("none")); + EXPECT_TRUE(dynamic_cast(b.get()) != nullptr); } { // known field & word with hits - Blueprint::UP b = - _index->createBlueprint(_requestContext, FieldSpec("f1", 0, 0), makeTerm("w1")); - EXPECT_TRUE(dynamic_cast(b.get()) != NULL); + auto b = _index->createBlueprint(_requestContext, FieldSpec("f1", 0, 0), makeTerm("w1")); + EXPECT_TRUE(dynamic_cast(b.get()) != nullptr); EXPECT_EQ(2u, b->getState().estimate().estHits); EXPECT_TRUE(!b->getState().estimate().empty); } { // known field & word without hits - Blueprint::UP b = - _index->createBlueprint(_requestContext, FieldSpec("f1", 0, 0), makeTerm("w2")); -// std::cerr << "BP = " << typeid(*b).name() << std::endl; - EXPECT_TRUE((dynamic_cast(b.get()) != NULL) || - (dynamic_cast(b.get()) != NULL)); + auto b = _index->createBlueprint(_requestContext, FieldSpec("f1", 0, 0), makeTerm("w2")); + EXPECT_TRUE((dynamic_cast(b.get()) != nullptr) || + (dynamic_cast(b.get()) != nullptr)); EXPECT_EQ(0u, b->getState().estimate().estHits); EXPECT_TRUE(b->getState().estimate().empty); } @@ -366,53 +370,64 @@ DiskIndexTest::requireThatBlueprintCanCreateSearchIterators() SimpleResult result_f1_w2; SimpleResult result_f2_w2({1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17}); auto upper_bound = Blueprint::FilterConstraint::UPPER_BOUND; - { // bit vector due to isFilter - b = _index->createBlueprint(_requestContext, FieldSpec("f2", 0, 0, true), makeTerm("w2")); - b->basic_plan(true, 1000); - b->fetchPostings(search::queryeval::ExecuteInfo::FULL); + { // bitvector due to is_filter_field=true + b = create_blueprint(FieldSpec("f2", 0, 0, true), makeTerm("w2")); auto& leaf_b = dynamic_cast(*b); s = leaf_b.createLeafSearch(mda); - EXPECT_TRUE(dynamic_cast(s.get()) != NULL); + EXPECT_TRUE(dynamic_cast(s.get()) != nullptr); EXPECT_EQ(result_f2_w2, SimpleResult().search(*s)); EXPECT_EQ(result_f2_w2, SimpleResult().search(*leaf_b.createFilterSearch(upper_bound))); } - { // bit vector due to no ranking needed - b = _index->createBlueprint(_requestContext, FieldSpec("f2", 0, 0, false), makeTerm("w2")); - b->basic_plan(true, 1000); - b->fetchPostings(ExecuteInfo::FULL); + { // bitvector due to no ranking needed + b = create_blueprint(FieldSpec("f2", 0, 0, false), makeTerm("w2")); auto& leaf_b = dynamic_cast(*b); s = leaf_b.createLeafSearch(mda); - EXPECT_FALSE(dynamic_cast(s.get()) != NULL); + EXPECT_FALSE(dynamic_cast(s.get()) != nullptr); TermFieldMatchData md2; md2.tagAsNotNeeded(); TermFieldMatchDataArray mda2; mda2.add(&md2); EXPECT_TRUE(mda2[0]->isNotNeeded()); s = (dynamic_cast(b.get()))->createLeafSearch(mda2); - EXPECT_TRUE(dynamic_cast(s.get()) != NULL); + EXPECT_TRUE(dynamic_cast(s.get()) != nullptr); EXPECT_EQ(result_f2_w2, SimpleResult().search(*s)); EXPECT_EQ(result_f2_w2, SimpleResult().search(*leaf_b.createFilterSearch(upper_bound))); } - { // fake bit vector - b = _index->createBlueprint(_requestContext, FieldSpec("f1", 0, 0, true), makeTerm("w2")); -// std::cerr << "BP = " << typeid(*b).name() << std::endl; - b->basic_plan(true, 1000); - b->fetchPostings(ExecuteInfo::FULL); + { // fake bitvector (wrapping posocc iterator) + b = create_blueprint(FieldSpec("f1", 0, 0, true), makeTerm("w1")); auto& leaf_b = dynamic_cast(*b); s = leaf_b.createLeafSearch(mda); -// std::cerr << "SI = " << typeid(*s).name() << std::endl; - EXPECT_TRUE((dynamic_cast(s.get()) != NULL) || - dynamic_cast(s.get())); - EXPECT_EQ(result_f1_w2, SimpleResult().search(*s)); - EXPECT_EQ(result_f1_w2, SimpleResult().search(*leaf_b.createFilterSearch(upper_bound))); + EXPECT_TRUE(dynamic_cast(s.get()) != nullptr); + EXPECT_EQ(result_f1_w1, SimpleResult().search(*s)); + EXPECT_EQ(result_f1_w1, SimpleResult().search(*leaf_b.createFilterSearch(upper_bound))); } { // posting list iterator - b = _index->createBlueprint(_requestContext, FieldSpec("f1", 0, 0), makeTerm("w1")); - b->basic_plan(true, 1000); - b->fetchPostings(ExecuteInfo::FULL); + b = create_blueprint(FieldSpec("f1", 0, 0), makeTerm("w1")); + auto& leaf_b = dynamic_cast(*b); + s = leaf_b.createLeafSearch(mda); + ASSERT_TRUE((dynamic_cast *>(s.get()) != nullptr)); + EXPECT_EQ(result_f1_w1, SimpleResult().search(*s)); + EXPECT_EQ(result_f1_w1, SimpleResult().search(*leaf_b.createFilterSearch(upper_bound))); + } + { // bitvector used due to bitvector_limit set. + // The term 'w2' hits 17 docs in field 'f2' (bitvector for term exists). + double bitvector_limit = 16.0 / 100.0; + _requestContext.get_create_blueprint_params().disk_index_bitvector_limit = bitvector_limit; + b = create_blueprint(FieldSpec("f2", 0, 0, false), makeTerm("w2"), 100); + auto& leaf_b = dynamic_cast(*b); + s = leaf_b.createLeafSearch(mda); + EXPECT_TRUE(dynamic_cast(s.get()) != nullptr); + EXPECT_EQ(result_f2_w2, SimpleResult().search(*s)); + EXPECT_EQ(result_f2_w2, SimpleResult().search(*leaf_b.createFilterSearch(upper_bound))); + } + { // fake bitvector (wrapping posocc iterator) used due to bitvector_limit set. + // The term 'w1' hits 2 docs in field 'f1' (bitvector for term doesn't exist). + double bitvector_limit = 1.0 / 100.0; + _requestContext.get_create_blueprint_params().disk_index_bitvector_limit = bitvector_limit; + b = create_blueprint(FieldSpec("f1", 0, 0, false), makeTerm("w1"), 100); auto& leaf_b = dynamic_cast(*b); s = leaf_b.createLeafSearch(mda); - ASSERT_TRUE((dynamic_cast *>(s.get()) != NULL)); + EXPECT_TRUE((dynamic_cast(s.get()) != nullptr)); EXPECT_EQ(result_f1_w1, SimpleResult().search(*s)); EXPECT_EQ(result_f1_w1, SimpleResult().search(*leaf_b.createFilterSearch(upper_bound))); } @@ -490,7 +505,7 @@ DiskIndexTest::test_io_settings(const IOSettings& io_settings) ASSERT_TRUE(posting_list_cache); auto stats = posting_list_cache->get_stats(); EXPECT_EQ(2, stats.misses); - EXPECT_EQ(1, stats.hits); + EXPECT_EQ(3, stats.hits); } else { ASSERT_FALSE(posting_list_cache); } diff --git a/searchlib/src/tests/ranksetup/ranksetup_test.cpp b/searchlib/src/tests/ranksetup/ranksetup_test.cpp index a1339df74f80..87eeddf2771f 100644 --- a/searchlib/src/tests/ranksetup/ranksetup_test.cpp +++ b/searchlib/src/tests/ranksetup/ranksetup_test.cpp @@ -563,6 +563,7 @@ TEST_F(RankSetupTest, rank_setup) env.getProperties().add(matching::FuzzyAlgorithm::NAME, "dfa_implicit"); env.getProperties().add(matching::WeakAndStopWordAdjustLimit::NAME, "0.05"); env.getProperties().add(matching::WeakAndStopWordDropLimit::NAME, "0.5"); + env.getProperties().add(matching::DiskIndexBitvectorLimit::NAME, "0.04"); RankSetup rs(_factory, env); EXPECT_FALSE(rs.has_match_features()); @@ -608,6 +609,7 @@ TEST_F(RankSetupTest, rank_setup) EXPECT_EQ(rs.get_fuzzy_matching_algorithm(), vespalib::FuzzyMatchingAlgorithm::DfaImplicit); EXPECT_EQ(rs.get_weakand_stop_word_adjust_limit(), 0.05); EXPECT_EQ(rs.get_weakand_stop_word_drop_limit(), 0.5); + EXPECT_EQ(rs.get_disk_index_bitvector_limit(), 0.04); } bool diff --git a/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp b/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp index 9c620d6f932c..8da9aff14a08 100644 --- a/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp @@ -5,14 +5,16 @@ #include "fileheader.h" #include "pagedict4randread.h" #include +#include #include -#include #include +#include +#include #include #include -#include -#include #include +#include +#include #include #include @@ -310,8 +312,9 @@ class CreateBlueprintVisitor : public CreateBlueprintVisitorHelper { const std::string termStr = termAsString(n); const DiskIndex::LookupResult & lookupRes = _cache.lookup(termStr, _fieldId); if (lookupRes.valid()) { - bool useBitVector = _field.isFilter(); - setResult(std::make_unique(_field, _diskIndex.get_field_index(_fieldId), termStr, lookupRes, useBitVector)); + double bitvector_limit = getRequestContext().get_create_blueprint_params().disk_index_bitvector_limit; + setResult(std::make_unique + (_field, _diskIndex.get_field_index(_fieldId), termStr, lookupRes, _field.isFilter(), bitvector_limit)); } else { setResult(std::make_unique(_field)); } diff --git a/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp index d83d48fced41..4e2d05778d3b 100644 --- a/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp @@ -42,14 +42,16 @@ DiskTermBlueprint::DiskTermBlueprint(const FieldSpec & field, const FieldIndex& field_index, const std::string& query_term, DictionaryLookupResult lookupRes, - bool useBitVector) + bool is_filter_field, + double bitvector_limit) : SimpleLeafBlueprint(field), _field(field), _field_index(field_index), _query_term(query_term), _lookupRes(std::move(lookupRes)), _bitvector_lookup_result(_field_index.lookup_bit_vector(_lookupRes)), - _useBitVector(useBitVector), + _is_filter_field(is_filter_field), + _bitvector_limit(bitvector_limit), _fetchPostingsDone(false), _postingHandle(), _bitVector(), @@ -90,7 +92,7 @@ DiskTermBlueprint::fetchPostings(const queryeval::ExecuteInfo &execInfo) { (void) execInfo; if (!_fetchPostingsDone) { - if (_useBitVector && _bitvector_lookup_result.valid()) { + if (use_bitvector() && _bitvector_lookup_result.valid()) { if (LOG_WOULD_LOG(debug)) [[unlikely]] { log_bitvector_read(); } @@ -113,6 +115,13 @@ DiskTermBlueprint::calculate_flow_stats(uint32_t docid_limit) const return {rel_est, disk_index_cost(rel_est), disk_index_strict_cost(rel_est)}; } +bool +DiskTermBlueprint::use_bitvector() const +{ + return _is_filter_field || + ((get_docid_limit() > 0) && ((double)_lookupRes.counts._numDocs / (double)get_docid_limit()) > _bitvector_limit); +} + const BitVector * DiskTermBlueprint::get_bitvector() const { @@ -133,13 +142,13 @@ DiskTermBlueprint::get_bitvector() const SearchIterator::UP DiskTermBlueprint::createLeafSearch(const TermFieldMatchDataArray & tfmda) const { - if (_bitvector_lookup_result.valid() && (_useBitVector || tfmda[0]->isNotNeeded())) { + if (_bitvector_lookup_result.valid() && (_bitVector || tfmda[0]->isNotNeeded())) { LOG(debug, "Return BitVectorIterator: %s, wordNum(%" PRIu64 "), docCount(%" PRIu64 ")", getName(_field_index.get_field_id()).c_str(), _lookupRes.wordNum, _lookupRes.counts._numDocs); return BitVectorIterator::create(get_bitvector(), *tfmda[0], strict()); } auto search(_field_index.create_iterator(_lookupRes, _postingHandle, tfmda)); - if (_useBitVector) { + if (use_bitvector()) { LOG(debug, "Return BooleanMatchIteratorWrapper: %s, wordNum(%" PRIu64 "), docCount(%" PRIu64 ")", getName(_field_index.get_field_id()).c_str(), _lookupRes.wordNum, _lookupRes.counts._numDocs); return std::make_unique(std::move(search), tfmda); diff --git a/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.h b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.h index bffdb54df4c1..a35df216cd8f 100644 --- a/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.h +++ b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.h @@ -15,16 +15,18 @@ class DiskTermBlueprint : public queryeval::SimpleLeafBlueprint private: queryeval::FieldSpec _field; const FieldIndex& _field_index; - std::string _query_term; - index::DictionaryLookupResult _lookupRes; + std::string _query_term; + index::DictionaryLookupResult _lookupRes; index::BitVectorDictionaryLookupResult _bitvector_lookup_result; - bool _useBitVector; + bool _is_filter_field; + double _bitvector_limit; bool _fetchPostingsDone; index::PostingListHandle _postingHandle; std::shared_ptr _bitVector; mutable std::mutex _mutex; mutable std::shared_ptr _late_bitvector; + bool use_bitvector() const; const BitVector* get_bitvector() const; void log_bitvector_read() const __attribute__((noinline)); void log_posting_list_read() const __attribute__((noinline)); @@ -32,16 +34,20 @@ class DiskTermBlueprint : public queryeval::SimpleLeafBlueprint /** * Create a new blueprint. * - * @param field the field to search in. - * @param field_index the field index used to read the bit vector or posting list. - * @param lookupRes the result after disk dictionary lookup. - * @param useBitVector whether or not we should use bit vector. + * @param field The field to search in. + * @param field_index The field index used to read the bit vector or posting list. + * @param lookupRes The result after disk dictionary lookup. + * @param is_filter_field Whether this field is filter and we should force use of bit vector. + * @param bitvector_limit The hit estimate limit for whether bitvector should be used for searching this term. + This can be used to tune performance at the cost of quality. + If no bitvector exists for the term, a fake bitvector wrapping the posocc iterator is used. **/ DiskTermBlueprint(const queryeval::FieldSpec & field, const FieldIndex& field_index, const std::string& query_term, index::DictionaryLookupResult lookupRes, - bool useBitVector); + bool is_filter_field, + double bitvector_limit); queryeval::FlowStats calculate_flow_stats(uint32_t docid_limit) const override; diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp index 6cb1ada99249..d7e09dc620d4 100644 --- a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp +++ b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp @@ -458,6 +458,13 @@ double WeakAndStopWordDropLimit::lookup(const Properties &props, double defaultV return lookupDouble(props, NAME, defaultValue); } +const std::string DiskIndexBitvectorLimit::NAME("vespa.matching.diskindex.bitvector_limit"); +const double DiskIndexBitvectorLimit::DEFAULT_VALUE(1.0); +double DiskIndexBitvectorLimit::lookup(const Properties& props) { return lookup(props, DEFAULT_VALUE); } +double DiskIndexBitvectorLimit::lookup(const Properties& props, double default_value) { + return lookupDouble(props, NAME, default_value); +} + const std::string TargetHitsMaxAdjustmentFactor::NAME("vespa.matching.nns.target_hits_max_adjustment_factor"); const double TargetHitsMaxAdjustmentFactor::DEFAULT_VALUE(20.0); diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.h b/searchlib/src/vespa/searchlib/fef/indexproperties.h index 4095cfa39573..1d04ed0021e1 100644 --- a/searchlib/src/vespa/searchlib/fef/indexproperties.h +++ b/searchlib/src/vespa/searchlib/fef/indexproperties.h @@ -363,6 +363,17 @@ namespace matching { static double lookup(const Properties &props, double defaultValue); }; + /** + * Use bitvector posting list for terms searching in disk indexes that match more than this limit of the corpus. + * If a bitvector is not available for the term, mask the posocc posting list as a bitvector iterator. + **/ + struct DiskIndexBitvectorLimit { + static const std::string NAME; + static const double DEFAULT_VALUE; + static double lookup(const Properties& props); + static double lookup(const Properties& props, double default_value); + }; + /** * Property to control the algorithm using for fuzzy matching. **/ diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp index 3c225e65df9e..ab0fda87dd38 100644 --- a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp +++ b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp @@ -77,6 +77,7 @@ RankSetup::RankSetup(const BlueprintFactory &factory, const IIndexEnvironment &i _weakand_range(0.0), _weakand_stop_word_adjust_limit(matching::WeakAndStopWordAdjustLimit::DEFAULT_VALUE), _weakand_stop_word_drop_limit(matching::WeakAndStopWordDropLimit::DEFAULT_VALUE), + _disk_index_bitvector_limit(matching::DiskIndexBitvectorLimit::DEFAULT_VALUE), _fuzzy_matching_algorithm(vespalib::FuzzyMatchingAlgorithm::DfaTable), _mutateOnMatch(), _mutateOnFirstPhase(), @@ -136,6 +137,7 @@ RankSetup::configure() set_weakand_range(temporary::WeakAndRange::lookup(_indexEnv.getProperties())); set_weakand_stop_word_adjust_limit(matching::WeakAndStopWordAdjustLimit::lookup(_indexEnv.getProperties())); set_weakand_stop_word_drop_limit(matching::WeakAndStopWordDropLimit::lookup(_indexEnv.getProperties())); + set_disk_index_bitvector_limit(matching::DiskIndexBitvectorLimit::lookup(_indexEnv.getProperties())); _mutateOnMatch._attribute = mutate::on_match::Attribute::lookup(_indexEnv.getProperties()); _mutateOnMatch._operation = mutate::on_match::Operation::lookup(_indexEnv.getProperties()); _mutateOnFirstPhase._attribute = mutate::on_first_phase::Attribute::lookup(_indexEnv.getProperties()); diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.h b/searchlib/src/vespa/searchlib/fef/ranksetup.h index f0c9ff798893..969fe3c7dac2 100644 --- a/searchlib/src/vespa/searchlib/fef/ranksetup.h +++ b/searchlib/src/vespa/searchlib/fef/ranksetup.h @@ -86,6 +86,7 @@ class RankSetup double _weakand_range; double _weakand_stop_word_adjust_limit; double _weakand_stop_word_drop_limit; + double _disk_index_bitvector_limit; vespalib::FuzzyMatchingAlgorithm _fuzzy_matching_algorithm; MutateOperation _mutateOnMatch; MutateOperation _mutateOnFirstPhase; @@ -418,6 +419,8 @@ class RankSetup double get_weakand_stop_word_adjust_limit() const { return _weakand_stop_word_adjust_limit; } void set_weakand_stop_word_drop_limit(double v) { _weakand_stop_word_drop_limit = v; } double get_weakand_stop_word_drop_limit() const { return _weakand_stop_word_drop_limit; } + void set_disk_index_bitvector_limit(double v) { _disk_index_bitvector_limit = v; } + double get_disk_index_bitvector_limit() const { return _disk_index_bitvector_limit; } /** * This method may be used to indicate that certain features diff --git a/searchlib/src/vespa/searchlib/queryeval/create_blueprint_params.h b/searchlib/src/vespa/searchlib/queryeval/create_blueprint_params.h index 20f095728be1..195f50a913e5 100644 --- a/searchlib/src/vespa/searchlib/queryeval/create_blueprint_params.h +++ b/searchlib/src/vespa/searchlib/queryeval/create_blueprint_params.h @@ -19,19 +19,22 @@ struct CreateBlueprintParams vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm; double weakand_range; queryeval::wand::StopWordStrategy weakand_stop_word_strategy; + double disk_index_bitvector_limit; CreateBlueprintParams(double global_filter_lower_limit_in, double global_filter_upper_limit_in, double target_hits_max_adjustment_factor_in, vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm_in, double weakand_range_in, - queryeval::wand::StopWordStrategy weakand_stop_word_strategy_in) + queryeval::wand::StopWordStrategy weakand_stop_word_strategy_in, + double disk_index_bitvector_limit_in) : global_filter_lower_limit(global_filter_lower_limit_in), global_filter_upper_limit(global_filter_upper_limit_in), target_hits_max_adjustment_factor(target_hits_max_adjustment_factor_in), fuzzy_matching_algorithm(fuzzy_matching_algorithm_in), weakand_range(weakand_range_in), - weakand_stop_word_strategy(weakand_stop_word_strategy_in) + weakand_stop_word_strategy(weakand_stop_word_strategy_in), + disk_index_bitvector_limit(disk_index_bitvector_limit_in) { } @@ -41,7 +44,8 @@ struct CreateBlueprintParams fef::indexproperties::matching::TargetHitsMaxAdjustmentFactor::DEFAULT_VALUE, fef::indexproperties::matching::FuzzyAlgorithm::DEFAULT_VALUE, fef::indexproperties::temporary::WeakAndRange::DEFAULT_VALUE, - queryeval::wand::StopWordStrategy::none()) + queryeval::wand::StopWordStrategy::none(), + fef::indexproperties::matching::DiskIndexBitvectorLimit::DEFAULT_VALUE) { } }; diff --git a/searchlib/src/vespa/searchlib/queryeval/fake_requestcontext.h b/searchlib/src/vespa/searchlib/queryeval/fake_requestcontext.h index ee09974b4473..38f4f9df3f49 100644 --- a/searchlib/src/vespa/searchlib/queryeval/fake_requestcontext.h +++ b/searchlib/src/vespa/searchlib/queryeval/fake_requestcontext.h @@ -50,6 +50,8 @@ class FakeRequestContext : public IRequestContext const CreateBlueprintParams& get_create_blueprint_params() const override; const MetaStoreReadGuardSP * getMetaStoreReadGuard() const override { return nullptr; } + + CreateBlueprintParams& get_create_blueprint_params() { return _create_blueprint_params; } private: std::unique_ptr _clock; const vespalib::Doom _doom;