quickwit-oss · AliFlux · Jul 11, 2022 · Jul 12, 2022 · Jul 13, 2022 · Jul 13, 2022
diff --git a/4.0 b/4.0
@@ -0,0 +1,26 @@
+Collecting pytest
+  Downloading pytest-7.1.2-py3-none-any.whl (297 kB)
+Requirement already satisfied: colorama in c:\users\alphaceph\anaconda3\envs\py310\lib\site-packages (from pytest) (0.4.4)
+Collecting attrs>=19.2.0
+  Downloading attrs-21.4.0-py2.py3-none-any.whl (60 kB)
+Collecting py>=1.8.2
+  Downloading py-1.11.0-py2.py3-none-any.whl (98 kB)
+Collecting atomicwrites>=1.0
+  Downloading atomicwrites-1.4.1.tar.gz (14 kB)
+Collecting pluggy<2.0,>=0.12
+  Downloading pluggy-1.0.0-py2.py3-none-any.whl (13 kB)
+Collecting packaging
+  Downloading packaging-21.3-py3-none-any.whl (40 kB)
+Collecting iniconfig
+  Downloading iniconfig-1.1.1-py2.py3-none-any.whl (5.0 kB)
+Requirement already satisfied: tomli>=1.0.0 in c:\users\alphaceph\anaconda3\envs\py310\lib\site-packages (from pytest) (2.0.1)
+Collecting pyparsing!=3.0.5,>=2.0.2
+  Downloading pyparsing-3.0.9-py3-none-any.whl (98 kB)
+Building wheels for collected packages: atomicwrites
+  Building wheel for atomicwrites (setup.py): started
+  Building wheel for atomicwrites (setup.py): finished with status 'done'
+  Created wheel for atomicwrites: filename=atomicwrites-1.4.1-py2.py3-none-any.whl size=6957 sha256=a1a268c4dc96c217af8ea7655cc187e388dcca401b511a0a00e532af25c25aee
+  Stored in directory: c:\users\alphaceph\appdata\local\pip\cache\wheels\34\07\0b\33b15f68736109f72ea0bb2499521d87312b932620737447a2
+Successfully built atomicwrites
+Installing collected packages: pyparsing, py, pluggy, packaging, iniconfig, attrs, atomicwrites, pytest
+Successfully installed atomicwrites-1.4.1 attrs-21.4.0 iniconfig-1.1.1 packaging-21.3 pluggy-1.0.0 py-1.11.0 pyparsing-3.0.9 pytest-7.1.2
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,7 +1,8 @@
 [build-system]
-requires = ["maturin"]
+requires = ["maturin>=0.13,<0.14"]
 build-backend = "maturin"
 
 [project]
 name = "tantivy"
 requires-python = ">=3.7"
+
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -1,2 +1,2 @@
-maturin
+maturin==0.13.0
 pytest>=4.0
diff --git a/src/facet.rs b/src/facet.rs
@@ -48,7 +48,7 @@ impl Facet {
     #[classmethod]
     fn from_string(_cls: &PyType, facet_string: &str) -> Facet {
         Facet {
-            inner: schema::Facet::from(facet_string),
+            inner: schema::Facet::from_text(facet_string).unwrap(),
         }
     }
 

diff --git a/src/index.rs b/src/index.rs
@@ -314,10 +314,12 @@ impl Index {
     ///         field is specified in the query.
     ///
     #[args(reload_policy = "RELOAD_POLICY")]
+    #[args(conjunction_by_default = false)]
     pub fn parse_query(
         &self,
         query: &str,
         default_field_names: Option<Vec<String>>,
+        conjunction_by_default: bool,
     ) -> PyResult<Query> {
         let mut default_fields = vec![];
         let schema = self.index.schema();
@@ -344,12 +346,26 @@ impl Index {
         } else {
             for (field, field_entry) in self.index.schema().fields() {
                 if field_entry.is_indexed() {
-                    default_fields.push(field);
+
+                    match field_entry.field_type() {
+                        tv::schema::FieldType::Facet(_) => {
+                            // facets aren't suited for default fields
+                        },
+                        _ => {
+                            default_fields.push(field);
+                        },
+                    }
+
                 }
             }
         }
-        let parser =
+        let mut parser =
             tv::query::QueryParser::for_index(&self.index, default_fields);
+
+        if conjunction_by_default {
+            parser.set_conjunction_by_default();
+        }
+
         let query = parser.parse_query(query).map_err(to_pyerr)?;
 
         Ok(Query { inner: query })

diff --git a/src/schemabuilder.rs b/src/schemabuilder.rs
@@ -2,7 +2,7 @@
 
 use pyo3::{exceptions, prelude::*};
 
-use tantivy::schema;
+use tantivy::schema::{self, FacetOptions};
 
 use crate::schema::Schema;
 use std::sync::{Arc, RwLock};
@@ -131,6 +131,50 @@ impl SchemaBuilder {
         Ok(self.clone())
     }
 
+    /// Add a new float64 field to the schema.
+    /// Note: When adding value to the index, make sure that it is type-casted to float
+    /// Adding integers or other values may produce false result
+    ///
+    /// Args:
+    ///     name (str): The name of the field.
+    ///     stored (bool, optional): If true sets the field as stored, the
+    ///         content of the field can be later restored from a Searcher.
+    ///         Defaults to False.
+    ///     indexed (bool, optional): If true sets the field to be indexed.
+    ///     fast (str, optional): Set the f64 options as a single-valued fast
+    ///         field. Fast fields are designed for random access. Access time
+    ///         are similar to a random lookup in an array. If more than one
+    ///         value is associated to a fast field, only the last one is kept.
+    ///         Can be one of 'single' or 'multi'. If this is set to 'single,
+    ///         the document must have exactly one value associated to the
+    ///         document. If this is set to 'multi', the document can have any
+    ///         number of values associated to the document. Defaults to None,
+    ///         which disables this option.
+    ///
+    /// Returns the associated field handle.
+    /// Raises a ValueError if there was an error with the field creation.
+    #[args(stored = false, indexed = false)]
+    fn add_float_field(
+        &mut self,
+        name: &str,
+        stored: bool,
+        indexed: bool,
+        fast: Option<&str>,
+    ) -> PyResult<Self> {
+        let builder = &mut self.builder;
+
+        let opts = SchemaBuilder::build_float_option(stored, indexed, fast)?;
+
+        if let Some(builder) = builder.write().unwrap().as_mut() {
+            builder.add_f64_field(name, opts);
+        } else {
+            return Err(exceptions::PyValueError::new_err(
+                "Schema builder object isn't valid anymore.",
+            ));
+        }
+        Ok(self.clone())
+    }
+
     /// Add a new unsigned integer field to the schema.
     ///
     /// Args:
@@ -267,11 +311,12 @@ impl SchemaBuilder {
     /// Add a Facet field to the schema.
     /// Args:
     ///     name (str): The name of the field.
+    #[args(stored = false, indexed = false)]
     fn add_facet_field(&mut self, name: &str) -> PyResult<Self> {
         let builder = &mut self.builder;
 
         if let Some(builder) = builder.write().unwrap().as_mut() {
-            builder.add_facet_field(name, INDEXED);
+            builder.add_facet_field(name, FacetOptions::default());
         } else {
             return Err(exceptions::PyValueError::new_err(
                 "Schema builder object isn't valid anymore.",
@@ -352,6 +397,39 @@ impl SchemaBuilder {
         Ok(opts)
     }
 
+    fn build_float_option(
+        stored: bool,
+        indexed: bool,
+        fast: Option<&str>,
+    ) -> PyResult<schema::NumericOptions> {
+        let opts = schema::NumericOptions::default();
+
+        let opts = if stored { opts.set_stored() } else { opts };
+        let opts = if indexed { opts.set_indexed() } else { opts };
+
+        let fast = match fast {
+            Some(f) => {
+                let f = f.to_lowercase();
+                match f.as_ref() {
+                    "single" => Some(schema::Cardinality::SingleValue),
+                    "multi" => Some(schema::Cardinality::MultiValues),
+                    _ => return Err(exceptions::PyValueError::new_err(
+                        "Invalid index option, valid choices are: 'multivalue' and 'singlevalue'"
+                    )),
+                }
+            }
+            None => None,
+        };
+
+        let opts = if let Some(f) = fast {
+            opts.set_fast(f)
+        } else {
+            opts
+        };
+
+        Ok(opts)
+    }
+
     fn build_text_option(
         stored: bool,
         tokenizer_name: &str,

diff --git a/src/searcher.rs b/src/searcher.rs
@@ -1,9 +1,13 @@
 #![allow(clippy::new_ret_no_self)]
 
+use std::collections::HashMap;
 use crate::{document::Document, get_field, query::Query, to_pyerr};
 use pyo3::{exceptions::PyValueError, prelude::*};
 use tantivy as tv;
 use tantivy::collector::{Count, MultiCollector, TopDocs};
+use tv::collector::{FacetCollector};
+use tv::fastfield::FastFieldReader;
+use tv::{SegmentReader, Score, DocId};
 
 /// Tantivy's Searcher class
 ///
@@ -41,10 +45,15 @@ impl ToPyObject for Fruit {
 /// Object holding a results successful search.
 pub(crate) struct SearchResult {
     hits: Vec<(Fruit, DocAddress)>,
+
     #[pyo3(get)]
     /// How many documents matched the query. Only available if `count` was set
     /// to true during the search.
     count: Option<usize>,
+
+    #[pyo3(get)]
+    /// Results of facets using using `count_facets_by_field` parameter
+    facet_counts: Option<HashMap<String, u64>>,
 }
 
 #[pymethods]
@@ -83,10 +92,15 @@ impl Searcher {
     ///         return. Defaults to 10.
     ///     count (bool, optional): Should the number of documents that match
     ///         the query be returned as well. Defaults to true.
+    ///     count_facets_by_field (Field, optional): Return grouped number of
+    ///         documents by the given facet field. Defaults to false
     ///     order_by_field (Field, optional): A schema field that the results
     ///         should be ordered by. The field must be declared as a fast field
     ///         when building the schema. Note, this only works for unsigned
     ///         fields.
+    ///     weight_by_field (Field, optional): A schema field increases the
+    ///         score of the document by the given value. It should be a fast
+    ///         field of float data type
     ///     offset (Field, optional): The offset from which the results have
     ///         to be returned.
     ///
@@ -100,7 +114,9 @@ impl Searcher {
         query: &Query,
         limit: usize,
         count: bool,
+        count_facets_by_field: Option<&str>,
         order_by_field: Option<&str>,
+        weight_by_field: Option<&str>,
         offset: usize,
     ) -> PyResult<SearchResult> {
         let mut multicollector = MultiCollector::new();
@@ -111,8 +127,50 @@ impl Searcher {
             None
         };
 
+        let facet_handle = if let Some(facet_name) = count_facets_by_field {
+            let field = get_field(&self.inner.index().schema(), facet_name)?;
+            let mut facet_collector = FacetCollector::for_field(field);
+            facet_collector.add_facet("/");
+            Some(multicollector.add_collector(facet_collector))
+        } else {
+            None
+        };
+
         let (mut multifruit, hits) = {
-            if let Some(order_by) = order_by_field {
+
+            if let Some(weight_by) = weight_by_field {
+
+                let field = get_field(&self.inner.index().schema(), weight_by)?;
+                let collector = TopDocs::with_limit(limit)
+                    .and_offset(offset)
+                    .tweak_score(move |segment_reader: &SegmentReader| {
+                        let weight_reader = segment_reader.fast_fields().f64(field).unwrap();
+                        return move |doc: DocId, original_score: Score| {
+                            let weight: f64 = weight_reader.get(doc);
+                            let new_score = original_score + weight as f32;
+                            return new_score
+                        }
+                    });
+
+                let top_docs_handle = multicollector.add_collector(collector);
+                let ret = self.inner.search(query.get(), &multicollector);
+
+                match ret {
+                    Ok(mut r) => {
+                        let top_docs = top_docs_handle.extract(&mut r);
+                        let result: Vec<(Fruit, DocAddress)> = top_docs
+                            .iter()
+                            .map(|(f, d)| {
+                                (Fruit::Score(*f), DocAddress::from(d))
+                            })
+                            .collect();
+                        (r, result)
+                    }
+                    Err(e) => return Err(PyValueError::new_err(e.to_string())),
+                }
+
+            } else if let Some(order_by) = order_by_field {
+
                 let field = get_field(&self.inner.index().schema(), order_by)?;
                 let collector = TopDocs::with_limit(limit)
                     .and_offset(offset)
@@ -159,7 +217,26 @@ impl Searcher {
             None => None,
         };
 
-        Ok(SearchResult { hits, count })
+        let facet_counts:Option<HashMap<String, u64>> = match facet_handle {
+            Some(h) => {
+                let facet_counts_obj = h.extract(&mut multifruit);
+
+                let collection: Vec<(&tv::schema::Facet, u64)> = facet_counts_obj
+                    .get("/")
+                    .collect();
+
+                let mut facet_counts:HashMap<String, u64> = HashMap::new();
+
+                for (facet, count) in collection.iter() {
+                    facet_counts.insert(facet.to_path_string(), *count);
+                }
+
+                Some(facet_counts)
+            },
+            None => None,
+        };
+
+        Ok(SearchResult { hits, count, facet_counts})
     }
 
     /// Returns the overall number of documents in the index.