Skip to content

Commit

Permalink
GO-4472 try to use api
Browse files Browse the repository at this point in the history
  • Loading branch information
fat-fellow committed Nov 27, 2024
1 parent 326ea2e commit e56c858
Show file tree
Hide file tree
Showing 4 changed files with 362 additions and 198 deletions.
346 changes: 254 additions & 92 deletions rust/src/queries/convert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,98 +134,260 @@ pub fn parse_query_from_json(
convert_to_tantivy(index, parsed, schema)
}

#[test]
fn test_file_reading() {
let file_path = "../test_jsons/data.json";
let contents = fs::read_to_string(file_path).expect("Failed to read file");

let expected: FinalQuery = FinalQuery {
texts: Vec::from(["term", "term2"].map(|t| t.to_string())),
fields: Vec::from(
["body1", "body2", "body3", "title1", "title2", "title3"].map(|t| t.to_string()),
),
query: BoolQuery {
subqueries: Vec::from([
QueryElement {
query: Some(GoQuery::PhraseQuery {
field_index: 0,
text_index: 0,
boost: 1.0,
}),
modifier: QueryModifier::Must,
},
QueryElement {
query: Some(GoQuery::PhrasePrefixQuery {
field_index: 1,
text_index: 0,
boost: 1.0,
}),
modifier: QueryModifier::Should,
},
QueryElement {
query: Some(GoQuery::SingleTermPrefixQuery {
field_index: 2,
text_index: 0,
boost: 1.0,
}),
modifier: QueryModifier::MustNot,
},
QueryElement {
query: Some(GoQuery::PhraseQuery {
field_index: 3,
text_index: 1,
boost: 0.1,
}),
modifier: QueryModifier::Must,
},
QueryElement {
query: Some(GoQuery::PhrasePrefixQuery {
field_index: 4,
text_index: 1,
boost: 0.1,
}),
modifier: QueryModifier::Should,
},
QueryElement {
query: Some(GoQuery::SingleTermPrefixQuery {
field_index: 5,
text_index: 1,
boost: 0.1,
}),
modifier: QueryModifier::MustNot,
},
QueryElement {
query: Some(GoQuery::BoolQuery {
subqueries: Vec::from([
QueryElement {
query: Some(GoQuery::PhrasePrefixQuery {
field_index: 0,
text_index: 0,
boost: 1.0,
}),
modifier: QueryModifier::Should,
},
QueryElement {
query: Some(GoQuery::BoolQuery {
subqueries: Vec::from([QueryElement {
query: Some(GoQuery::PhraseQuery {
field_index: 0,
text_index: 0,
boost: 0.8,
}),
modifier: QueryModifier::Must,
}]),
}),
modifier: QueryModifier::Should,
},
]),
}),
modifier: QueryModifier::Must,
},
]),
},
mod for_tests {
use crate::queries::GoQuery::BoolQuery;
use crate::queries::{FinalQuery, GoQuery, QueryElement, QueryModifier};
}

#[cfg(test)]
mod tests {
use crate::queries::convert::convert_to_tantivy;
use crate::queries::models::BoolQuery;
use crate::queries::{FinalQuery, GoQuery, QueryElement, QueryModifier};
use std::fs;
use tantivy::query::PhrasePrefixQuery;
use tantivy::schema::{IndexRecordOption, Schema, TextFieldIndexing, STORED, TEXT};
use tantivy::tokenizer::{
AsciiFoldingFilter, Language, LowerCaser, RemoveLongFilter, SimpleTokenizer, Stemmer,
TextAnalyzer,
};
let parsed: FinalQuery = serde_json::from_str(&contents).expect("Json was not parsed");
use tantivy::Index;

fn expected_query() -> FinalQuery {
FinalQuery {
texts: vec!["some words", "term", "another term", "term2"]
.into_iter()
.map(|t| t.to_string())
.collect(),
fields: vec!["body1", "body2", "body3", "title1", "title2", "title3"]
.into_iter()
.map(|t| t.to_string())
.collect(),
query: BoolQuery {
subqueries: Vec::from([
QueryElement {
query: Some(GoQuery::PhraseQuery {
field_index: 0,
text_index: 0,
boost: 1.0,
}),
modifier: QueryModifier::Must,
},
QueryElement {
query: Some(GoQuery::PhrasePrefixQuery {
field_index: 1,
text_index: 1,
boost: 1.0,
}),
modifier: QueryModifier::Should,
},
QueryElement {
query: Some(GoQuery::SingleTermPrefixQuery {
field_index: 2,
text_index: 1,
boost: 1.0,
}),
modifier: QueryModifier::MustNot,
},
QueryElement {
query: Some(GoQuery::PhraseQuery {
field_index: 3,
text_index: 2,
boost: 0.1,
}),
modifier: QueryModifier::Must,
},
QueryElement {
query: Some(GoQuery::PhrasePrefixQuery {
field_index: 4,
text_index: 3,
boost: 0.1,
}),
modifier: QueryModifier::Should,
},
QueryElement {
query: Some(GoQuery::SingleTermPrefixQuery {
field_index: 5,
text_index: 3,
boost: 0.1,
}),
modifier: QueryModifier::MustNot,
},
QueryElement {
query: Some(GoQuery::BoolQuery {
subqueries: Vec::from([
QueryElement {
query: Some(GoQuery::PhrasePrefixQuery {
field_index: 0,
text_index: 0,
boost: 1.0,
}),
modifier: QueryModifier::Should,
},
QueryElement {
query: Some(GoQuery::BoolQuery {
subqueries: Vec::from([QueryElement {
query: Some(GoQuery::PhraseQuery {
field_index: 0,
text_index: 0,
boost: 0.8,
}),
modifier: QueryModifier::Must,
}]),
}),
modifier: QueryModifier::Should,
},
]),
}),
modifier: QueryModifier::Must,
},
]),
},
}
}

#[test]
fn test_file_reading() {
let file_path = "../test_jsons/data.json";
let contents = fs::read_to_string(file_path).expect("Failed to read file");

let expected: FinalQuery = expected_query();
let parsed: FinalQuery = serde_json::from_str(&contents).expect("Json was not parsed");

assert_eq!(expected, parsed);
}

#[test]
fn test_convert() {
let given_query: FinalQuery = expected_query();
let text_analyzer_simple = TextAnalyzer::builder(SimpleTokenizer::default()).build();

let mut text_options_body = TEXT;
text_options_body = text_options_body | STORED;
text_options_body = text_options_body.set_indexing_options(
TextFieldIndexing::default()
.set_tokenizer("simple")
.set_index_option(IndexRecordOption::WithFreqsAndPositions),
);

let mut schema_builder = Schema::builder();
schema_builder.add_text_field("body1", text_options_body.clone()); // Field(0)
schema_builder.add_text_field("body2", text_options_body.clone());
schema_builder.add_text_field("body3", text_options_body.clone());
schema_builder.add_text_field("title1", text_options_body.clone());
schema_builder.add_text_field("title2", text_options_body.clone());
schema_builder.add_text_field("title3", text_options_body); // Field(5)
let schema = schema_builder.build();
let index = Index::create_in_ram(schema.clone());
index.tokenizers().register("simple", text_analyzer_simple);

assert_eq!(parsed, expected);
let parsed = convert_to_tantivy(&index, given_query, &schema).expect("can't convert");

let expected = expected_tantivy_query_str();

assert_eq!(expected, format!("{parsed:#?}"));
}

fn expected_tantivy_query_str() -> &'static str {
r#"BooleanQuery {
subqueries: [
(
Must,
PhraseQuery {
field: Field(
0,
),
phrase_terms: [
(
0,
Term(field=0, type=Str, "some"),
),
(
1,
Term(field=0, type=Str, "words"),
),
],
slop: 0,
},
),
(
Should,
PhrasePrefixQuery {
field: Field(
1,
),
phrase_terms: [],
prefix: (
0,
Term(field=1, type=Str, "term"),
),
max_expansions: 50,
},
),
(
MustNot,
PhrasePrefixQuery {
field: Field(
2,
),
phrase_terms: [],
prefix: (
0,
Term(field=2, type=Str, "term"),
),
max_expansions: 50,
},
),
(
Must,
Boost(query=PhraseQuery { field: Field(3), phrase_terms: [(0, Term(field=3, type=Str, "another")), (1, Term(field=3, type=Str, "term"))], slop: 0 }, boost=0.1),
),
(
Should,
Boost(query=PhrasePrefixQuery { field: Field(4), phrase_terms: [], prefix: (0, Term(field=4, type=Str, "term2")), max_expansions: 50 }, boost=0.1),
),
(
MustNot,
Boost(query=PhrasePrefixQuery { field: Field(5), phrase_terms: [], prefix: (0, Term(field=5, type=Str, "term2")), max_expansions: 50 }, boost=0.1),
),
(
Must,
BooleanQuery {
subqueries: [
(
Should,
PhrasePrefixQuery {
field: Field(
0,
),
phrase_terms: [
(
0,
Term(field=0, type=Str, "some"),
),
],
prefix: (
1,
Term(field=0, type=Str, "words"),
),
max_expansions: 50,
},
),
(
Should,
BooleanQuery {
subqueries: [
(
Must,
Boost(query=PhraseQuery { field: Field(0), phrase_terms: [(0, Term(field=0, type=Str, "some")), (1, Term(field=0, type=Str, "words"))], slop: 0 }, boost=0.8),
),
],
},
),
],
},
),
],
}"#
}
}
2 changes: 1 addition & 1 deletion searchquerybuilder.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ type QueryModifier int
const (
Must QueryModifier = iota
Should
ShouldNot
MustNot
)

type FieldQuery struct {
Expand Down
10 changes: 5 additions & 5 deletions tantivy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -519,16 +519,16 @@ func Test(t *testing.T) {
qb := tantivy_go.NewQueryBuilder()

finalQuery := qb.
Query(tantivy_go.Must, "body1", "term", tantivy_go.PhraseQuery, 1.0).
Query(tantivy_go.Must, "body1", "some words", tantivy_go.PhraseQuery, 1.0).
Query(tantivy_go.Should, "body2", "term", tantivy_go.PhrasePrefixQuery, 1.0).
Query(tantivy_go.ShouldNot, "body3", "term", tantivy_go.SingleTermPrefixQuery, 1.0).
Query(tantivy_go.Must, "title1", "term2", tantivy_go.PhraseQuery, 0.1).
Query(tantivy_go.MustNot, "body3", "term", tantivy_go.SingleTermPrefixQuery, 1.0).
Query(tantivy_go.Must, "title1", "another term", tantivy_go.PhraseQuery, 0.1).
Query(tantivy_go.Should, "title2", "term2", tantivy_go.PhrasePrefixQuery, 0.1).
Query(tantivy_go.ShouldNot, "title3", "term2", tantivy_go.SingleTermPrefixQuery, 0.1).
Query(tantivy_go.MustNot, "title3", "term2", tantivy_go.SingleTermPrefixQuery, 0.1).
BooleanQuery(tantivy_go.Must, tantivy_go.NewQueryBuilder().
Query(tantivy_go.Should, "summary", "term3", tantivy_go.PhrasePrefixQuery, 1.0).
BooleanQuery(tantivy_go.Should, tantivy_go.NewQueryBuilder().
Query(tantivy_go.Must, "comments", "term4", tantivy_go.PhraseQuery, 0.8),
Query(tantivy_go.Must, "comments", "not single term", tantivy_go.PhraseQuery, 0.8),
),
).
Build()
Expand Down
Loading

0 comments on commit e56c858

Please sign in to comment.