From f600bf4b1d4672cf83319d54557de40a3646ad00 Mon Sep 17 00:00:00 2001 From: Mikhail Iudin Date: Sat, 23 Nov 2024 01:01:11 +0100 Subject: [PATCH] GO-4472 try to use api --- bindings.h | 6 + go.mod | 8 +- go.sum | 16 +- rust/src/c_util/mod.rs | 1 + rust/src/c_util/util.rs | 465 +++++++++++++++++++++++++++++++++++++++- rust/src/lib.rs | 28 ++- searchquerybuilder.go | 123 +++++++++++ tantivy_test.go | 71 ++++++ tantivycontext.go | 33 +++ 9 files changed, 733 insertions(+), 18 deletions(-) create mode 100644 searchquerybuilder.go diff --git a/bindings.h b/bindings.h index 20b7cae..a754dfd 100644 --- a/bindings.h +++ b/bindings.h @@ -80,6 +80,12 @@ struct SearchResult *context_search(struct TantivyContext *context_ptr, uintptr_t docs_limit, bool with_highlights); +struct SearchResult *context_search2(struct TantivyContext *context_ptr, + const char *query_ptr, + char **error_buffer, + uintptr_t docs_limit, + bool with_highlights); + void context_free(struct TantivyContext *context_ptr); uintptr_t search_result_get_size(struct SearchResult *result_ptr, char **error_buffer); diff --git a/go.mod b/go.mod index 6a5ddf9..039a1e9 100644 --- a/go.mod +++ b/go.mod @@ -4,14 +4,14 @@ go 1.22.0 require ( github.com/stretchr/testify v1.9.0 - golang.org/x/mobile v0.0.0-20241016134751-7ff83004ec2c + golang.org/x/mobile v0.0.0-20241108191957-fa514ef75a0f ) require ( github.com/davecgh/go-spew v1.1.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - golang.org/x/mod v0.21.0 // indirect - golang.org/x/sync v0.8.0 // indirect - golang.org/x/tools v0.26.0 // indirect + golang.org/x/mod v0.22.0 // indirect + golang.org/x/sync v0.9.0 // indirect + golang.org/x/tools v0.27.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index a012e69..66134c6 100644 --- a/go.sum +++ b/go.sum @@ -4,14 +4,14 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= -golang.org/x/mobile v0.0.0-20241016134751-7ff83004ec2c h1:zuNS/LWsEpPTLfrmBkis6Xofw3nieAqB4hYLn8+uswk= -golang.org/x/mobile v0.0.0-20241016134751-7ff83004ec2c/go.mod h1:snk1Mn2ZpdKCt90JPEsDh4sL3ReK520U2t0d7RHBnSU= -golang.org/x/mod v0.21.0 h1:vvrHzRwRfVKSiLrG+d4FMl/Qi4ukBCE6kZlTUkDYRT0= -golang.org/x/mod v0.21.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY= -golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= -golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= -golang.org/x/tools v0.26.0 h1:v/60pFQmzmT9ExmjDv2gGIfi3OqfKoEP6I5+umXlbnQ= -golang.org/x/tools v0.26.0/go.mod h1:TPVVj70c7JJ3WCazhD8OdXcZg/og+b9+tH/KxylGwH0= +golang.org/x/mobile v0.0.0-20241108191957-fa514ef75a0f h1:23H/YlmTHfmmvpZ+ajKZL0qLz0+IwFOIqQA0mQbmLeM= +golang.org/x/mobile v0.0.0-20241108191957-fa514ef75a0f/go.mod h1:UbSUP4uu/C9hw9R2CkojhXlAxvayHjBdU9aRvE+c1To= +golang.org/x/mod v0.22.0 h1:D4nJWe9zXqHOmWqj4VMOJhvzj7bEZg4wEYa759z1pH4= +golang.org/x/mod v0.22.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY= +golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ= +golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/tools v0.27.0 h1:qEKojBykQkQ4EynWy4S8Weg69NumxKdn40Fce3uc/8o= +golang.org/x/tools v0.27.0/go.mod h1:sUi0ZgbwW9ZPAq26Ekut+weQPR5eIM6GQLQ1Yjm1H0Q= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= diff --git a/rust/src/c_util/mod.rs b/rust/src/c_util/mod.rs index e21cd66..ef79a05 100644 --- a/rust/src/c_util/mod.rs +++ b/rust/src/c_util/mod.rs @@ -12,6 +12,7 @@ pub use self::util::add_and_consume_documents; pub use self::util::delete_docs; pub use self::util::create_context_with_schema; pub use self::util::search; +pub use self::util::search2; pub use self::util::drop_any; pub use self::util::get_doc; pub use self::util::add_field; \ No newline at end of file diff --git a/rust/src/c_util/util.rs b/rust/src/c_util/util.rs index b02337c..08db6d8 100644 --- a/rust/src/c_util/util.rs +++ b/rust/src/c_util/util.rs @@ -1,16 +1,21 @@ -use std::{fs, panic, slice}; +use std::{fmt, fs, panic, slice}; use std::borrow::Cow; use std::collections::HashMap; +use std::error::Error; use std::ffi::{CStr, CString}; use std::os::raw::{c_char, c_float}; use std::panic::PanicInfo; use std::path::Path; use log::debug; +use serde::{de, Deserialize, Deserializer, Serialize}; +use serde::de::Visitor; use serde_json::json; use tantivy::{Index, IndexWriter, Score, TantivyDocument, TantivyError, Term}; use tantivy::directory::MmapDirectory; -use tantivy::query::{QueryParser}; +use tantivy::query::{BooleanQuery, Occur, PhraseQuery, Query, QueryParser}; +use tantivy::query_grammar::parse_query; use tantivy::schema::{Field, Schema}; +use crate::c_util::util::GoQuery::PhrasePrefixQuery; use crate::config; use crate::tantivy_util::{convert_document_to_json, Document, TantivyContext, DOCUMENT_BUDGET_BYTES, find_highlights, get_string_field_entry, SearchResult}; @@ -497,13 +502,463 @@ pub fn search( }; } - let len = documents.len(); + let size = documents.len(); Ok(Box::into_raw(Box::new(SearchResult { - documents: documents, - size: len, + documents, + size, }))) } +pub fn search2( + query_ptr: *const c_char, + error_buffer: *mut *mut c_char, + docs_limit: usize, + context: &mut TantivyContext, + with_highlights: bool, +) -> Result<*mut SearchResult, Box> { + let searcher = &context.reader().searcher(); + let schema = context.index.schema(); + + let query = match assert_string(query_ptr, error_buffer) { + Some(value) => value, + None => return Err(Box::new(fmt::Error)) + }; + debug!("###1 {:?}", query); + + let query = parse_query_from_json(&context.index, &schema, &query)?; + debug!("###2 {:?}", query); + + let top_docs = match searcher.search( + &query, + &tantivy::collector::TopDocs::with_limit(docs_limit), + ) { + Ok(top_docs) => top_docs, + Err(err) => { + set_error(&err.to_string(), error_buffer); + return Err(Box::new(fmt::Error)); + } + }; + + let mut documents = Vec::new(); + for (score, doc_address) in top_docs { + match searcher.doc::(doc_address) { + Ok(doc) => { + let highlights = match find_highlights( + with_highlights, &searcher, &query, &doc, schema.clone()) { + Ok(highlights) => highlights, + Err(err) => { + set_error(&err.to_string(), error_buffer); + return Err(Box::new(fmt::Error)); + } + }; + documents.push(Document { + tantivy_doc: doc, + highlights, + score: score, + }); + } + + Err(err) => { + set_error(&err.to_string(), error_buffer); + return Err(Box::new(fmt::Error)); + } + }; + } + + let size = documents.len(); + Ok(Box::into_raw(Box::new(SearchResult { + documents, + size, + }))) +} + +fn parse_query_from_json( + index: &Index, + schema: &Schema, + json: &str) -> Result, Box> { + debug!("### 31 {:?}", json); + let parsed = serde_json::from_str(json)?; + debug!("### 32 {:?}", parsed); + convert_to_tantivy(index, parsed, schema) +} + +// Convert your `QueryModifier` to Tantivy's `Occur` +fn modifier_to_occur(modifier: &QueryModifier) -> Occur { + match modifier { + QueryModifier::Must => Occur::Must, + QueryModifier::Should => Occur::Should, + QueryModifier::MustNot => Occur::MustNot, + } +} + +// Main conversion function +fn convert_to_tantivy( + index: &Index, + parsed: FinalQuery, + schema: &Schema, +) -> Result, Box> { + // Validate the schema and ensure field mappings exist + if parsed.fields.is_empty() || parsed.texts.is_empty() { + return Err("Fields or texts cannot be empty".into()); + } + + // Recursive function to convert `QueryElement` to Tantivy's queries + fn element_to_query( + index: &Index, + element: &QueryElement, + schema: &Schema, + texts: &[String], + fields: &[String], + ) -> Result<(Occur, Box), Box> { + + let occur = modifier_to_occur(&element.modifier); + + if let Some(go_query) = &element.query { + match go_query { + GoQuery::PhraseQuery { + field_index, + text_index, + boost: _, + } => { + let field = fields.get(*field_index) + .ok_or("Invalid field index in PhraseQuery")?; + let text = texts.get(*text_index) + .ok_or("Invalid text index in PhraseQuery")?; + let field = schema.get_field(field) + .or(Err("Invalid field name"))?; + + let terms = exract_terms(&index, field, text)?; + let phrase_query = PhraseQuery::new(terms); + Ok((occur, Box::new(phrase_query))) + } + + GoQuery::PhrasePrefixQuery { + field_index, + text_index, + boost: _, + } => { + let field = fields.get(*field_index) + .ok_or("Invalid field index in PhraseQuery")?; + let text = texts.get(*text_index) + .ok_or("Invalid text index in PhraseQuery")?; + let field = schema.get_field(field) + .or(Err("Invalid field name"))?; + + let terms = exract_terms(&index, field, text)?; + let phrase_query = tantivy::query::PhrasePrefixQuery::new(terms); + Ok((occur, Box::new(phrase_query))) + } + + GoQuery::SingleTermPrefixQuery { + field_index, + text_index, + boost: _, + } => { + let field = fields.get(*field_index) + .ok_or("Invalid field index in PhraseQuery")?; + let text = texts.get(*text_index) + .ok_or("Invalid text index in PhraseQuery")?; + let field = schema.get_field(field) + .or(Err("Invalid field name"))?; + + let terms = exract_terms(&index, field, text)?; + let phrase_query = tantivy::query::PhrasePrefixQuery::new(vec![terms[0].clone()]); //todo + Ok((occur, Box::new(phrase_query))) + } + + GoQuery::BoolQuery { subqueries } => { + let mut sub_queries = vec![]; + for subquery in subqueries { + sub_queries.push(element_to_query(index, subquery, schema, texts, fields)?); + } + let bool_query = BooleanQuery::from(sub_queries); + Ok((occur, Box::new(bool_query))) + } + _ => Err("Unsupported GoQuery variant".into()), + } + } else { + Err("Query is None in QueryElement".into()) + } + } + fn exract_terms( + index: &Index, + field: Field, + query: &str + ) -> Result<(Vec), Box> { + let mut tokenizer = index.tokenizer_for_field(field)?; + let mut token_stream = tokenizer.token_stream(query); + let mut terms = Vec::new(); + while token_stream.advance() { + terms.push(token_stream.token().text.clone()); + //println!("### {}", token_stream.token().text); // Выводим текст каждого токена + } + let term_queries: Vec = terms + .iter() + .map(|term| Term::from_field_text(field, term)) + .collect(); + Ok(term_queries) + } + + // Convert top-level BoolQuery + let mut sub_queries = vec![]; + for subquery in &parsed.query.subqueries { + sub_queries.push(element_to_query( + index, + subquery, + schema, + &parsed.texts, + &parsed.fields, + )?); + } + + let bool_query = BooleanQuery::from(sub_queries); + Ok(Box::new(bool_query)) +} + +#[derive(Serialize, Debug)] +#[serde(rename_all = "snake_case")] +pub enum QueryType { + BoolQuery, + PhraseQuery, + PhrasePrefixQuery, + SingleTermPrefixQuery, + None, +} + +impl QueryType { + fn from_u64(value: u64) -> Option { + match value { + 0 => Some(QueryType::BoolQuery), + 1 => Some(QueryType::PhraseQuery), + 2 => Some(QueryType::PhrasePrefixQuery), + 3 => Some(QueryType::SingleTermPrefixQuery), + _ => None, + } + } +} + +impl<'de> Deserialize<'de> for QueryType { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct QueryTypeVisitor; + + impl<'de> Visitor<'de> for QueryTypeVisitor { + type Value = QueryType; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + write!(formatter, "a number representing the QueryType") + } + + fn visit_u64(self, value: u64) -> Result + where + E: de::Error, + { + QueryType::from_u64(value).ok_or_else(|| E::invalid_value(de::Unexpected::Unsigned(value), &self)) + } + } + + deserializer.deserialize_u64(QueryTypeVisitor) + } +} + +impl<'de> Deserialize<'de> for QueryModifier { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct QueryModifierVisitor; + + impl<'de> Visitor<'de> for QueryModifierVisitor { + type Value = QueryModifier; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + write!(formatter, "a number representing the QueryType") + } + + fn visit_u64(self, value: u64) -> Result + where + E: de::Error, + { + QueryModifier::from_u64(value).ok_or_else(|| E::invalid_value(de::Unexpected::Unsigned(value), &self)) + } + } + + deserializer.deserialize_u64(QueryModifierVisitor) + } +} + +#[derive(Serialize, Debug)] +#[serde(rename_all = "snake_case")] +pub enum QueryModifier { + Must, + Should, + MustNot, +} + +impl QueryModifier { + fn from_u64(val: u64) -> Option { + match val { + 0 => Some(QueryModifier::Must), + 1 => Some(QueryModifier::Should), + 2 => Some(QueryModifier::MustNot), + _ => None + } + } +} + + +#[derive(Serialize, Deserialize, Debug)] +#[serde(rename_all = "snake_case")] +pub enum GoQuery { + BoolQuery { + subqueries: Vec, + }, + PhraseQuery { + field_index: usize, + text_index: usize, + boost: f64, + }, + PhrasePrefixQuery { + field_index: usize, + text_index: usize, + boost: f64, + }, + SingleTermPrefixQuery { + field_index: usize, + text_index: usize, + boost: f64, + }, +} + + +#[derive(Serialize, Debug)] +#[serde(rename_all = "snake_case")] +pub struct QueryElement { + pub query: Option, + pub modifier: QueryModifier, +} + +#[derive(Serialize, Deserialize, Debug)] +#[serde(rename_all = "snake_case")] +pub struct BoolQuery { + pub subqueries: Vec, +} + +#[derive(Serialize, Deserialize, Debug)] +#[serde(rename_all = "snake_case")] +pub struct FinalQuery { + pub texts: Vec, + pub fields: Vec, + pub query: BoolQuery, +} + +impl<'de> Deserialize<'de> for QueryElement { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + // Используем Value для начального этапа + let map: serde_json::Value = serde_json::Value::deserialize(deserializer)?; + + // Десериализуем modifier как QueryModifier + let modifier = map + .get("query_modifier") + .ok_or_else(|| serde::de::Error::missing_field("query_modifier"))? + .as_u64() + .and_then(QueryModifier::from_u64) + .ok_or_else(|| serde::de::Error::custom("Invalid query_modifier"))?; + + // Десериализуем query_type как QueryType + let query_type = map + .get("query_type") + .ok_or_else(|| serde::de::Error::missing_field("query_type"))? + .as_u64() + .and_then(QueryType::from_u64) + .ok_or_else(|| serde::de::Error::custom("Invalid query_type"))?; + + // Обрабатываем поле query + let query = match query_type { + QueryType::BoolQuery => { + let subqueries = map + .get("query") + .and_then(|q| q.get("subqueries")) + .ok_or_else(|| serde::de::Error::missing_field("subqueries"))?; + Some(GoQuery::BoolQuery { + subqueries: serde_json::from_value(subqueries.clone()) + .map_err(serde::de::Error::custom)?, + }) + } + QueryType::PhraseQuery => { + let query_data = map + .get("query") + .and_then(|q| q.as_object()) + .ok_or_else(|| serde::de::Error::missing_field("query"))?; + Some(GoQuery::PhraseQuery { + field_index: query_data + .get("field_index") + .and_then(|v| v.as_u64()) + .unwrap_or(0) as usize, + text_index: query_data + .get("text_index") + .and_then(|v| v.as_u64()) + .unwrap_or(0) as usize, + boost: query_data + .get("boost") + .and_then(|v| v.as_f64()) + .unwrap_or(1.0), + }) + } + QueryType::PhrasePrefixQuery => { + let query_data = map + .get("query") + .and_then(|q| q.as_object()) + .ok_or_else(|| serde::de::Error::missing_field("query"))?; + Some(GoQuery::PhrasePrefixQuery { + field_index: query_data + .get("field_index") + .and_then(|v| v.as_u64()) + .unwrap_or(0) as usize, + text_index: query_data + .get("text_index") + .and_then(|v| v.as_u64()) + .unwrap_or(0) as usize, + boost: query_data + .get("boost") + .and_then(|v| v.as_f64()) + .unwrap_or(1.0), + }) + } + QueryType::SingleTermPrefixQuery => { + let query_data = map + .get("query") + .and_then(|q| q.as_object()) + .ok_or_else(|| serde::de::Error::missing_field("query"))?; + Some(GoQuery::SingleTermPrefixQuery { + field_index: query_data + .get("field_index") + .and_then(|v| v.as_u64()) + .unwrap_or(0) as usize, + text_index: query_data + .get("text_index") + .and_then(|v| v.as_u64()) + .unwrap_or(0) as usize, + boost: query_data + .get("boost") + .and_then(|v| v.as_f64()) + .unwrap_or(1.0), + }) + } + QueryType::None => None, + }; + + // Создаем и возвращаем QueryElement + Ok(QueryElement { query, modifier }) + } +} + + pub fn drop_any(ptr: *mut T) { if !ptr.is_null() { unsafe { drop(Box::from_raw(ptr)); } diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 970f1bd..10ed319 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -4,7 +4,7 @@ use std::ptr; use logcall::logcall; use tantivy::{schema::*}; -use crate::c_util::{add_and_consume_documents, add_field, assert_pointer, assert_str, assert_string, box_from, convert_document_as_json, create_context_with_schema, delete_docs, drop_any, get_doc, search, set_error, start_lib_init}; +use crate::c_util::{add_and_consume_documents, add_field, assert_pointer, assert_str, assert_string, box_from, convert_document_as_json, create_context_with_schema, delete_docs, drop_any, get_doc, search, search2, set_error, start_lib_init}; use crate::tantivy_util::{add_text_field, Document, register_edge_ngram_tokenizer, register_ngram_tokenizer, register_raw_tokenizer, register_simple_tokenizer, register_jieba_tokenizer, SearchResult, TantivyContext}; mod tantivy_util; @@ -311,6 +311,32 @@ pub extern "C" fn context_search( } } +#[logcall] +#[no_mangle] +pub extern "C" fn context_search2( + context_ptr: *mut TantivyContext, + query_ptr: *const c_char, + error_buffer: *mut *mut c_char, + docs_limit: usize, + with_highlights: bool, +) -> *mut SearchResult { + let context = match assert_pointer(context_ptr, error_buffer) { + Some(value) => value, + None => return ptr::null_mut() + }; + + match search2( + query_ptr, + error_buffer, + docs_limit, + context, + with_highlights, + ) { + Ok(value) => value, + Err(_) => return ptr::null_mut() + } +} + #[allow(clippy::not_unsafe_ptr_arg_deref)] #[logcall] #[no_mangle] diff --git a/searchquerybuilder.go b/searchquerybuilder.go new file mode 100644 index 0000000..520d303 --- /dev/null +++ b/searchquerybuilder.go @@ -0,0 +1,123 @@ +package tantivy_go + +type QueryType int + +const ( + BoolQuery QueryType = iota + PhraseQuery + PhrasePrefixQuery + SingleTermPrefixQuery + None +) + +type QueryModifier int + +const ( + Must QueryModifier = iota + Should + ShouldNot +) + +type FieldQuery struct { + FieldIndex int `json:"field_index"` + TextIndex int `json:"text_index"` + Boost float64 `json:"boost"` +} + +type QueryElement struct { + Query Query `json:"query"` + Modifier QueryModifier `json:"query_modifier"` + QueryType QueryType `json:"query_type"` +} + +type BooleanQuery struct { + Subqueries []QueryElement `json:"subqueries"` +} + +type FinalQuery struct { + Texts []string `json:"texts"` + Fields []string `json:"fields"` + Query *BooleanQuery `json:"query"` +} + +type QueryBuilder struct { + texts map[string]int + fields map[string]int + textList []string + fieldList []string + subqueries []QueryElement +} + +func NewQueryBuilder() *QueryBuilder { + return &QueryBuilder{ + texts: make(map[string]int), + textList: []string{}, + subqueries: []QueryElement{}, + } +} + +func (qb *QueryBuilder) AddText(text string) int { + if idx, exists := qb.texts[text]; exists { + return idx + } + idx := len(qb.textList) + qb.texts[text] = idx + qb.textList = append(qb.textList, text) + return idx +} + +func (qb *QueryBuilder) AddField(text string) int { + if idx, exists := qb.fields[text]; exists { + return idx + } + idx := len(qb.fieldList) + qb.texts[text] = idx + qb.fieldList = append(qb.fieldList, text) + return idx +} + +func (qb *QueryBuilder) Query(field string, text string, queryType QueryType, boost float64, modifier QueryModifier) *QueryBuilder { + textIndex := qb.AddText(text) + fieldIndex := qb.AddField(field) + qb.subqueries = append(qb.subqueries, QueryElement{ + Query: &FieldQuery{ + FieldIndex: fieldIndex, + TextIndex: textIndex, + Boost: boost, + }, + Modifier: modifier, + QueryType: queryType, + }) + return qb +} + +func (qb *QueryBuilder) BooleanQuery(modifier QueryModifier, subqueryFn func(*QueryBuilder)) *QueryBuilder { + subBuilder := NewQueryBuilder() + subqueryFn(subBuilder) + qb.subqueries = append(qb.subqueries, QueryElement{ + Query: &BooleanQuery{ + Subqueries: subBuilder.subqueries, + }, + Modifier: modifier, + QueryType: BoolQuery, + }) + return qb +} + +func (qb *QueryBuilder) Build() FinalQuery { + return FinalQuery{ + Texts: qb.textList, + Fields: qb.fieldList, + Query: &BooleanQuery{ + Subqueries: qb.subqueries, + }, + } +} + +type Query interface { + IsQuery() +} + +func (fq *FieldQuery) IsQuery() {} + +func (bq *BooleanQuery) IsQuery() {} diff --git a/tantivy_test.go b/tantivy_test.go index 433b01b..5a23db6 100644 --- a/tantivy_test.go +++ b/tantivy_test.go @@ -2,6 +2,7 @@ package tantivy_go_test import ( "encoding/json" + "fmt" "github.com/anyproto/tantivy-go/internal" "os" "testing" @@ -498,6 +499,76 @@ func Test(t *testing.T) { require.Equal(t, 2, int(size)) }) + t.Run("azaza", func(t *testing.T) { + schema, tc := fx(t, limit, 1, false, false) + + defer tc.Free() + + doc, err := addDoc(t, "an apple", "", "id1", tc) + require.NoError(t, err) + + doc2, err := addDoc(t, "", "an apple", "id2", tc) + require.NoError(t, err) + + err = tc.AddAndConsumeDocuments(doc, doc2) + require.NoError(t, err) + + docs, err := tc.NumDocs() + require.NoError(t, err) + require.Equal(t, uint64(2), docs) + + qb := tantivy_go.NewQueryBuilder() + + finalQuery := qb. + Query("title", "hello world", tantivy_go.PhraseQuery, 2.0, tantivy_go.Must). + Query("body", "specific term", tantivy_go.PhrasePrefixQuery, 1.0, tantivy_go.Should). + BooleanQuery(tantivy_go.Must, func(sub *tantivy_go.QueryBuilder) { + sub. + Query("summary", "another term", tantivy_go.PhrasePrefixQuery, 1.5, tantivy_go.Should). + BooleanQuery(tantivy_go.Should, func(nested *tantivy_go.QueryBuilder) { + nested.Query("comments", "deep term", tantivy_go.PhraseQuery, 0.8, tantivy_go.Must) + }) + }).Build() + marshal, _ := json.Marshal(finalQuery) + fmt.Printf("### Final Query: %s\n", marshal) + + sCtx := tantivy_go.NewSearchContextBuilder(). + SetQuery(string(marshal)). + SetDocsLimit(100). + SetWithHighlights(false). + Build() + result, err := tc.SearchV2(sCtx) + require.NoError(t, err) + + size, err := result.GetSize() + defer result.Free() + require.Equal(t, 2, int(size)) + resDoc, err := result.Get(0) + require.NoError(t, err) + jsonStr, err := resDoc.ToJson(schema, NameId) + require.NoError(t, err) + require.JSONEq(t, `{"highlights":[],"id":"id1","score":1.9676434993743896}`, jsonStr) + + sCtx2 := tantivy_go.NewSearchContextBuilder(). + SetQuery("apple"). + SetDocsLimit(100). + SetWithHighlights(false). + AddField(NameTitle, 1.0). + AddField(NameBody, 10.0). + Build() + result2, err := tc.Search(sCtx2) + require.NoError(t, err) + + size2, err := result2.GetSize() + defer result2.Free() + require.Equal(t, 2, int(size2)) + resDoc2, err := result2.Get(0) + require.NoError(t, err) + jsonStr2, err := resDoc2.ToJson(schema, NameId) + require.NoError(t, err) + require.JSONEq(t, `{"highlights":[],"id":"id2","score":4.919108867645264}`, jsonStr2) + }) + t.Run("docs search - when weights apply", func(t *testing.T) { schema, tc := fx(t, limit, 1, false, false) diff --git a/tantivycontext.go b/tantivycontext.go index 230cbc7..e181507 100644 --- a/tantivycontext.go +++ b/tantivycontext.go @@ -157,6 +157,39 @@ func (tc *TantivyContext) Search(sCtx SearchContext) (*SearchResult, error) { return &SearchResult{ptr: ptr}, nil } +// SearchV2 performs a simplified search query on the index and returns the search results. +// +// Parameters: +// - sCtx (SearchContext): The context for the search, containing query string, +// document limit, and highlight option. +// +// Returns: +// - *SearchResult: A pointer to the SearchResult containing the search results. +// - error: An error if the search fails. +func (tc *TantivyContext) SearchV2(sCtx SearchContext) (*SearchResult, error) { + // Ensure the query is valid + cQuery := C.CString(sCtx.GetQuery()) + defer C.string_free(cQuery) + + // Prepare the error buffer + var errBuffer *C.char + + // Call the C function + ptr := C.context_search2( + tc.ptr, + cQuery, + &errBuffer, + pointerCType(sCtx.GetDocsLimit()), + C.bool(sCtx.WithHighlights()), + ) + if ptr == nil { + defer C.string_free(errBuffer) + return nil, errors.New(C.GoString(errBuffer)) + } + + return &SearchResult{ptr: ptr}, nil +} + func (tc *TantivyContext) Free() { C.context_free(tc.ptr) }