From e56c85838211b4f2c83dcbaae9851ef8e1ce0cb4 Mon Sep 17 00:00:00 2001 From: Mikhail Iudin Date: Wed, 27 Nov 2024 22:19:03 +0100 Subject: [PATCH] GO-4472 try to use api --- rust/src/queries/convert.rs | 346 ++++++++++++++++++++++++++---------- searchquerybuilder.go | 2 +- tantivy_test.go | 10 +- test_jsons/data.json | 202 ++++++++++----------- 4 files changed, 362 insertions(+), 198 deletions(-) diff --git a/rust/src/queries/convert.rs b/rust/src/queries/convert.rs index 5e18fb2..e637728 100644 --- a/rust/src/queries/convert.rs +++ b/rust/src/queries/convert.rs @@ -134,98 +134,260 @@ pub fn parse_query_from_json( convert_to_tantivy(index, parsed, schema) } -#[test] -fn test_file_reading() { - let file_path = "../test_jsons/data.json"; - let contents = fs::read_to_string(file_path).expect("Failed to read file"); - - let expected: FinalQuery = FinalQuery { - texts: Vec::from(["term", "term2"].map(|t| t.to_string())), - fields: Vec::from( - ["body1", "body2", "body3", "title1", "title2", "title3"].map(|t| t.to_string()), - ), - query: BoolQuery { - subqueries: Vec::from([ - QueryElement { - query: Some(GoQuery::PhraseQuery { - field_index: 0, - text_index: 0, - boost: 1.0, - }), - modifier: QueryModifier::Must, - }, - QueryElement { - query: Some(GoQuery::PhrasePrefixQuery { - field_index: 1, - text_index: 0, - boost: 1.0, - }), - modifier: QueryModifier::Should, - }, - QueryElement { - query: Some(GoQuery::SingleTermPrefixQuery { - field_index: 2, - text_index: 0, - boost: 1.0, - }), - modifier: QueryModifier::MustNot, - }, - QueryElement { - query: Some(GoQuery::PhraseQuery { - field_index: 3, - text_index: 1, - boost: 0.1, - }), - modifier: QueryModifier::Must, - }, - QueryElement { - query: Some(GoQuery::PhrasePrefixQuery { - field_index: 4, - text_index: 1, - boost: 0.1, - }), - modifier: QueryModifier::Should, - }, - QueryElement { - query: Some(GoQuery::SingleTermPrefixQuery { - field_index: 5, - text_index: 1, - boost: 0.1, - }), - modifier: QueryModifier::MustNot, - }, - QueryElement { - query: Some(GoQuery::BoolQuery { - subqueries: Vec::from([ - QueryElement { - query: Some(GoQuery::PhrasePrefixQuery { - field_index: 0, - text_index: 0, - boost: 1.0, - }), - modifier: QueryModifier::Should, - }, - QueryElement { - query: Some(GoQuery::BoolQuery { - subqueries: Vec::from([QueryElement { - query: Some(GoQuery::PhraseQuery { - field_index: 0, - text_index: 0, - boost: 0.8, - }), - modifier: QueryModifier::Must, - }]), - }), - modifier: QueryModifier::Should, - }, - ]), - }), - modifier: QueryModifier::Must, - }, - ]), - }, +mod for_tests { + use crate::queries::GoQuery::BoolQuery; + use crate::queries::{FinalQuery, GoQuery, QueryElement, QueryModifier}; +} + +#[cfg(test)] +mod tests { + use crate::queries::convert::convert_to_tantivy; + use crate::queries::models::BoolQuery; + use crate::queries::{FinalQuery, GoQuery, QueryElement, QueryModifier}; + use std::fs; + use tantivy::query::PhrasePrefixQuery; + use tantivy::schema::{IndexRecordOption, Schema, TextFieldIndexing, STORED, TEXT}; + use tantivy::tokenizer::{ + AsciiFoldingFilter, Language, LowerCaser, RemoveLongFilter, SimpleTokenizer, Stemmer, + TextAnalyzer, }; - let parsed: FinalQuery = serde_json::from_str(&contents).expect("Json was not parsed"); + use tantivy::Index; + + fn expected_query() -> FinalQuery { + FinalQuery { + texts: vec!["some words", "term", "another term", "term2"] + .into_iter() + .map(|t| t.to_string()) + .collect(), + fields: vec!["body1", "body2", "body3", "title1", "title2", "title3"] + .into_iter() + .map(|t| t.to_string()) + .collect(), + query: BoolQuery { + subqueries: Vec::from([ + QueryElement { + query: Some(GoQuery::PhraseQuery { + field_index: 0, + text_index: 0, + boost: 1.0, + }), + modifier: QueryModifier::Must, + }, + QueryElement { + query: Some(GoQuery::PhrasePrefixQuery { + field_index: 1, + text_index: 1, + boost: 1.0, + }), + modifier: QueryModifier::Should, + }, + QueryElement { + query: Some(GoQuery::SingleTermPrefixQuery { + field_index: 2, + text_index: 1, + boost: 1.0, + }), + modifier: QueryModifier::MustNot, + }, + QueryElement { + query: Some(GoQuery::PhraseQuery { + field_index: 3, + text_index: 2, + boost: 0.1, + }), + modifier: QueryModifier::Must, + }, + QueryElement { + query: Some(GoQuery::PhrasePrefixQuery { + field_index: 4, + text_index: 3, + boost: 0.1, + }), + modifier: QueryModifier::Should, + }, + QueryElement { + query: Some(GoQuery::SingleTermPrefixQuery { + field_index: 5, + text_index: 3, + boost: 0.1, + }), + modifier: QueryModifier::MustNot, + }, + QueryElement { + query: Some(GoQuery::BoolQuery { + subqueries: Vec::from([ + QueryElement { + query: Some(GoQuery::PhrasePrefixQuery { + field_index: 0, + text_index: 0, + boost: 1.0, + }), + modifier: QueryModifier::Should, + }, + QueryElement { + query: Some(GoQuery::BoolQuery { + subqueries: Vec::from([QueryElement { + query: Some(GoQuery::PhraseQuery { + field_index: 0, + text_index: 0, + boost: 0.8, + }), + modifier: QueryModifier::Must, + }]), + }), + modifier: QueryModifier::Should, + }, + ]), + }), + modifier: QueryModifier::Must, + }, + ]), + }, + } + } + + #[test] + fn test_file_reading() { + let file_path = "../test_jsons/data.json"; + let contents = fs::read_to_string(file_path).expect("Failed to read file"); + + let expected: FinalQuery = expected_query(); + let parsed: FinalQuery = serde_json::from_str(&contents).expect("Json was not parsed"); + + assert_eq!(expected, parsed); + } + + #[test] + fn test_convert() { + let given_query: FinalQuery = expected_query(); + let text_analyzer_simple = TextAnalyzer::builder(SimpleTokenizer::default()).build(); + + let mut text_options_body = TEXT; + text_options_body = text_options_body | STORED; + text_options_body = text_options_body.set_indexing_options( + TextFieldIndexing::default() + .set_tokenizer("simple") + .set_index_option(IndexRecordOption::WithFreqsAndPositions), + ); + + let mut schema_builder = Schema::builder(); + schema_builder.add_text_field("body1", text_options_body.clone()); // Field(0) + schema_builder.add_text_field("body2", text_options_body.clone()); + schema_builder.add_text_field("body3", text_options_body.clone()); + schema_builder.add_text_field("title1", text_options_body.clone()); + schema_builder.add_text_field("title2", text_options_body.clone()); + schema_builder.add_text_field("title3", text_options_body); // Field(5) + let schema = schema_builder.build(); + let index = Index::create_in_ram(schema.clone()); + index.tokenizers().register("simple", text_analyzer_simple); - assert_eq!(parsed, expected); + let parsed = convert_to_tantivy(&index, given_query, &schema).expect("can't convert"); + + let expected = expected_tantivy_query_str(); + + assert_eq!(expected, format!("{parsed:#?}")); + } + + fn expected_tantivy_query_str() -> &'static str { + r#"BooleanQuery { + subqueries: [ + ( + Must, + PhraseQuery { + field: Field( + 0, + ), + phrase_terms: [ + ( + 0, + Term(field=0, type=Str, "some"), + ), + ( + 1, + Term(field=0, type=Str, "words"), + ), + ], + slop: 0, + }, + ), + ( + Should, + PhrasePrefixQuery { + field: Field( + 1, + ), + phrase_terms: [], + prefix: ( + 0, + Term(field=1, type=Str, "term"), + ), + max_expansions: 50, + }, + ), + ( + MustNot, + PhrasePrefixQuery { + field: Field( + 2, + ), + phrase_terms: [], + prefix: ( + 0, + Term(field=2, type=Str, "term"), + ), + max_expansions: 50, + }, + ), + ( + Must, + Boost(query=PhraseQuery { field: Field(3), phrase_terms: [(0, Term(field=3, type=Str, "another")), (1, Term(field=3, type=Str, "term"))], slop: 0 }, boost=0.1), + ), + ( + Should, + Boost(query=PhrasePrefixQuery { field: Field(4), phrase_terms: [], prefix: (0, Term(field=4, type=Str, "term2")), max_expansions: 50 }, boost=0.1), + ), + ( + MustNot, + Boost(query=PhrasePrefixQuery { field: Field(5), phrase_terms: [], prefix: (0, Term(field=5, type=Str, "term2")), max_expansions: 50 }, boost=0.1), + ), + ( + Must, + BooleanQuery { + subqueries: [ + ( + Should, + PhrasePrefixQuery { + field: Field( + 0, + ), + phrase_terms: [ + ( + 0, + Term(field=0, type=Str, "some"), + ), + ], + prefix: ( + 1, + Term(field=0, type=Str, "words"), + ), + max_expansions: 50, + }, + ), + ( + Should, + BooleanQuery { + subqueries: [ + ( + Must, + Boost(query=PhraseQuery { field: Field(0), phrase_terms: [(0, Term(field=0, type=Str, "some")), (1, Term(field=0, type=Str, "words"))], slop: 0 }, boost=0.8), + ), + ], + }, + ), + ], + }, + ), + ], +}"# + } } diff --git a/searchquerybuilder.go b/searchquerybuilder.go index 3903abc..7f68163 100644 --- a/searchquerybuilder.go +++ b/searchquerybuilder.go @@ -15,7 +15,7 @@ type QueryModifier int const ( Must QueryModifier = iota Should - ShouldNot + MustNot ) type FieldQuery struct { diff --git a/tantivy_test.go b/tantivy_test.go index 6da70e9..712c2d2 100644 --- a/tantivy_test.go +++ b/tantivy_test.go @@ -519,16 +519,16 @@ func Test(t *testing.T) { qb := tantivy_go.NewQueryBuilder() finalQuery := qb. - Query(tantivy_go.Must, "body1", "term", tantivy_go.PhraseQuery, 1.0). + Query(tantivy_go.Must, "body1", "some words", tantivy_go.PhraseQuery, 1.0). Query(tantivy_go.Should, "body2", "term", tantivy_go.PhrasePrefixQuery, 1.0). - Query(tantivy_go.ShouldNot, "body3", "term", tantivy_go.SingleTermPrefixQuery, 1.0). - Query(tantivy_go.Must, "title1", "term2", tantivy_go.PhraseQuery, 0.1). + Query(tantivy_go.MustNot, "body3", "term", tantivy_go.SingleTermPrefixQuery, 1.0). + Query(tantivy_go.Must, "title1", "another term", tantivy_go.PhraseQuery, 0.1). Query(tantivy_go.Should, "title2", "term2", tantivy_go.PhrasePrefixQuery, 0.1). - Query(tantivy_go.ShouldNot, "title3", "term2", tantivy_go.SingleTermPrefixQuery, 0.1). + Query(tantivy_go.MustNot, "title3", "term2", tantivy_go.SingleTermPrefixQuery, 0.1). BooleanQuery(tantivy_go.Must, tantivy_go.NewQueryBuilder(). Query(tantivy_go.Should, "summary", "term3", tantivy_go.PhrasePrefixQuery, 1.0). BooleanQuery(tantivy_go.Should, tantivy_go.NewQueryBuilder(). - Query(tantivy_go.Must, "comments", "term4", tantivy_go.PhraseQuery, 0.8), + Query(tantivy_go.Must, "comments", "not single term", tantivy_go.PhraseQuery, 0.8), ), ). Build() diff --git a/test_jsons/data.json b/test_jsons/data.json index f9153ce..4ebb772 100644 --- a/test_jsons/data.json +++ b/test_jsons/data.json @@ -1,106 +1,108 @@ { - "texts":[ - "term", - "term2" - ], - "fields":[ - "body1", - "body2", - "body3", - "title1", - "title2", - "title3" - ], - "query":{ - "subqueries":[ + "texts": [ + "some words", + "term", + "another term", + "term2" + ], + "fields": [ + "body1", + "body2", + "body3", + "title1", + "title2", + "title3" + ], + "query": { + "subqueries": [ + { + "query": { + "field_index": 0, + "text_index": 0, + "boost": 1 + }, + "query_modifier": 0, + "query_type": 1 + }, + { + "query": { + "field_index": 1, + "text_index": 1, + "boost": 1 + }, + "query_modifier": 1, + "query_type": 2 + }, + { + "query": { + "field_index": 2, + "text_index": 1, + "boost": 1 + }, + "query_modifier": 2, + "query_type": 3 + }, + { + "query": { + "field_index": 3, + "text_index": 2, + "boost": 0.1 + }, + "query_modifier": 0, + "query_type": 1 + }, + { + "query": { + "field_index": 4, + "text_index": 3, + "boost": 0.1 + }, + "query_modifier": 1, + "query_type": 2 + }, + { + "query": { + "field_index": 5, + "text_index": 3, + "boost": 0.1 + }, + "query_modifier": 2, + "query_type": 3 + }, + { + "query": { + "subqueries": [ { - "query":{ - "field_index":0, - "text_index":0, - "boost":1 - }, - "query_modifier":0, - "query_type":1 + "query": { + "field_index": 0, + "text_index": 0, + "boost": 1 + }, + "query_modifier": 1, + "query_type": 2 }, { - "query":{ - "field_index":1, - "text_index":0, - "boost":1 - }, - "query_modifier":1, - "query_type":2 - }, - { - "query":{ - "field_index":2, - "text_index":0, - "boost":1 - }, - "query_modifier":2, - "query_type":3 - }, - { - "query":{ - "field_index":3, - "text_index":1, - "boost":0.1 - }, - "query_modifier":0, - "query_type":1 - }, - { - "query":{ - "field_index":4, - "text_index":1, - "boost":0.1 - }, - "query_modifier":1, - "query_type":2 - }, - { - "query":{ - "field_index":5, - "text_index":1, - "boost":0.1 - }, - "query_modifier":2, - "query_type":3 - }, - { - "query":{ - "subqueries":[ - { - "query":{ - "field_index":0, - "text_index":0, - "boost":1 - }, - "query_modifier":1, - "query_type":2 - }, - { - "query":{ - "subqueries":[ - { - "query":{ - "field_index":0, - "text_index":0, - "boost":0.8 - }, - "query_modifier":0, - "query_type":1 - } - ] - }, - "query_modifier":1, - "query_type":0 - } - ] - }, - "query_modifier":0, - "query_type":0 + "query": { + "subqueries": [ + { + "query": { + "field_index": 0, + "text_index": 0, + "boost": 0.8 + }, + "query_modifier": 0, + "query_type": 1 + } + ] + }, + "query_modifier": 1, + "query_type": 0 } - ] - } + ] + }, + "query_modifier": 0, + "query_type": 0 + } + ] + } } \ No newline at end of file