Skip to content

Commit

Permalink
Changed fuzzy feature to strsim.
Browse files Browse the repository at this point in the history
  • Loading branch information
leontoeides committed Nov 15, 2023
1 parent e075ab3 commit 375a179
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 7 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ repository = "https://github.com/leontoeides/indicium"
maintenance = { status = "actively-developed" }

[features]
default = [ "simple", "ahash" ]
default = [ "simple", "strsim", "ahash" ]
simple = []
strsim = [ "dep:strsim" ]
ahash = [ "dep:ahash" ]
Expand Down
20 changes: 20 additions & 0 deletions src/simple/eddie_type.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// -----------------------------------------------------------------------------
//
/// This is used to select a string similarity metric implemented by the
/// Ilia Schelokov's [eddie](https://crates.io/crates/eddie) crate.
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub enum EddieType {
/// See [the detailed description](https://en.wikipedia.org/wiki/Levenshtein_distance).
Levenshtein,
/// See [the detailed description](https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance).
DamerauLevenshtein,
/// See [the detailed description](https://en.wikipedia.org/wiki/Hamming_distance).
Hamming,
/// See [the detailed description](https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance#Jaro_Similarity).
Jaro,
/// Like Jaro similarity but gives a higher score to the strings that start
/// with the same sequence of characters. See
/// [the detailed description](https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance#Jaro%E2%80%93Winkler_Similarity).
JaroWinkler,
} // EddieType
2 changes: 2 additions & 0 deletions src/simple/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ mod default;
mod deref;
mod deref_mut;
mod dump_keyword;
mod eddie_type;
mod indexable;
mod insert;
mod max_keys_per_keyword;
Expand All @@ -35,6 +36,7 @@ mod profile;

pub use crate::simple::autocomplete_type::AutocompleteType;
pub use crate::simple::builder::SearchIndexBuilder;
pub use crate::simple::eddie_type::EddieType;
pub use crate::simple::indexable::Indexable;
pub use crate::simple::search_index::SearchIndex;
pub use crate::simple::search_type::SearchType;
Expand Down
8 changes: 2 additions & 6 deletions src/simple/strsim_type.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
// -----------------------------------------------------------------------------
//
/// The `StrSimType` string similarity type is used to select a string
/// similarity metric implemented by the `strsim` crate. This allows fuzzy
/// searching.
///
/// Indicium relies on Danny Guo's [strsim](https://crates.io/crates/strsim)
/// string similarity crate for fuzzy matching.
/// This is used to select a string similarity metric implemented by the
/// Danny Guo's [strsim](https://crates.io/crates/strsim) crate.
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub enum StrSimType {
Expand Down
17 changes: 17 additions & 0 deletions src/simple/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,9 @@ fn simple() {
assert_eq!(search_results, vec![&1]);

// Ensure that fuzzy matching is working with live searches:
#[cfg(feature = "strsim")]
let search_results = search_index.search_type(&SearchType::Live, "1066 Harry");
#[cfg(feature = "strsim")]
assert_eq!(search_results, vec![&0]);

let autocomplete_options = search_index.autocomplete_type(&AutocompleteType::Keyword, "E");
Expand All @@ -128,7 +130,9 @@ fn simple() {
assert_eq!(autocomplete_options, vec!["1100 edgar".to_string(), "1100 edgar ætheling".to_string(), "1100 england".to_string()]);

// Test fuzzy-matching for global autocompletion:
#[cfg(feature = "strsim")]
let autocomplete_options = search_index.autocomplete_type(&AutocompleteType::Global, "1100 Englelund");
#[cfg(feature = "strsim")]
assert_eq!(autocomplete_options, vec!["1100 england".to_string()]);

// The only `w` keywords that `1087` should contain are `William` and
Expand All @@ -138,7 +142,9 @@ fn simple() {
assert_eq!(autocomplete_options, vec!["1087 william".to_string(), "1087 william rufus".to_string()]);

// Test fuzzy-matching for context autocompletion:
#[cfg(feature = "strsim")]
let autocomplete_options = search_index.autocomplete_type(&AutocompleteType::Context, "1087 Willy");
#[cfg(feature = "strsim")]
assert_eq!(autocomplete_options, vec!["1087 william".to_string(), "1087 william rufus".to_string()]);

// Ensure that `Context` autocomplete works with an empty search string /
Expand All @@ -148,12 +154,17 @@ fn simple() {
assert_eq!(autocomplete_options, vec!["1087".to_string()]);

// Test internal global fuzzy keyword search interface:
#[cfg(feature = "strsim")]
let similar_keyword = search_index.strsim_global_keyword(&"Willy".to_lowercase());
#[cfg(feature = "strsim")]
assert_eq!(similar_keyword, Some(&KString::from_ref("william")));

// Test internal global fuzzy autocompletion interface:
#[cfg(feature = "strsim")]
let similar_autocompletions = search_index.strsim_global_autocomplete(&"Normy".to_lowercase());
#[cfg(feature = "strsim")]
let similar_autocompletions_vec: Vec<&KString> = similar_autocompletions.into_iter().map(|(keyword, _keys)| keyword).collect();
#[cfg(feature = "strsim")]
assert_eq!(similar_autocompletions_vec, vec![&"norman".to_string()]);

// Test `Indexable` trait implementation for `ToString` generics:
Expand Down Expand Up @@ -206,11 +217,15 @@ fn simple() {
assert_eq!(search_results, vec![&13, &17]);

// Fuzzy matching:
#[cfg(feature = "strsim")]
let search_results = search_index.search_type(&SearchType::Live, "rivers");
#[cfg(feature = "strsim")]
assert_eq!(search_results, vec![&19]);

// Fuzzy matching:
#[cfg(feature = "strsim")]
let search_results = search_index.search_type(&SearchType::Live, "peet of Annan");
#[cfg(feature = "strsim")]
assert_eq!(search_results, vec![&3]);

// Keyword autocomplete:
Expand All @@ -226,7 +241,9 @@ fn simple() {
assert_eq!(autocomplete_options, vec!["krammer lock".to_string()]);

// Fuzzy matching context autocomplete:
#[cfg(feature = "strsim")]
let autocomplete_options = search_index.autocomplete_type(&AutocompleteType::Context, "stars are dancers");
#[cfg(feature = "strsim")]
assert_eq!(autocomplete_options, vec!["stars are dancing".to_string()]);

} // fn

0 comments on commit 375a179

Please sign in to comment.