Skip to content

Commit

Permalink
Corrected panic on UTF-8 searches.
Browse files Browse the repository at this point in the history
  • Loading branch information
leontoeides committed May 4, 2024
1 parent 3ffd699 commit 4ff7ebb
Show file tree
Hide file tree
Showing 9 changed files with 87 additions and 20 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
* Release notes are available on
[GitHub](https://github.com/leontoeides/indicium/releases).

* `0.6.2`: Corrected [panic on UTF-8 searches](https://github.com/leontoeides/indicium/issues/2).

* `0.6.1`: Removed `eddie` as the default string similarity crate, for now, due
to a potential `panic`.

Expand Down
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "indicium"
version = "0.6.1"
version = "0.6.2"
authors = ["Dylan Bowker <[email protected]>"]
edition = "2021"
categories = [ "database-implementations" ]
Expand All @@ -17,7 +17,7 @@ rust-version = "1.62.1"
default = [ "simple", "strsim", "ahash" ]
simple = []
select2 = [ "simple", "serde" ]
fuzzy = [ "eddie" ] # Deprecated feature. Redirects to `eddie` feature.
fuzzy = [ "strsim" ] # Deprecated feature. Redirects to `strsim` feature.
ahash = [ "dep:ahash" ]
eddie = [ "dep:eddie" ]
gxhash = [ "dep:gxhash" ]
Expand Down
16 changes: 13 additions & 3 deletions src/simple/internal/eddie/eddie_context_autocomplete.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,19 @@ impl<K: Hash + Ord> SearchIndex<K> {
// The user keyword must be longer than the match length to be
// evaluated for fuzzy-matches:
if user_keyword.len() >= self.fuzzy_length {
// Use the first _n_ characters of the user's keyword to find
// search index keywords to compare against:
&user_keyword[0..self.fuzzy_length]
// Get the byte index of the _n_th character:
let byte_index: Option<usize> = user_keyword
.char_indices()
.take(self.fuzzy_length)
.map(|(idx, _ch)| idx)
.max();
// Use the first _n_ characters of the user's keyword. These
// first characters are used to find search index keywords to
// fuzzy match against:
match byte_index {
Some(byte_index) => &user_keyword[0..byte_index],
None => return vec![],
} // match
} else {
// The user's keyword is too short. Do not perform any fuzzy
// matching:
Expand Down
16 changes: 13 additions & 3 deletions src/simple/internal/eddie/eddie_global_autocomplete.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,19 @@ impl<K: Hash + Ord> SearchIndex<K> {
// The user keyword must be longer than the match length to be
// evaluated for fuzzy-matches:
if user_keyword.len() >= self.fuzzy_length {
// Use the first _n_ characters of the user's keyword to find
// search index keywords to compare against:
&user_keyword[0..self.fuzzy_length]
// Get the byte index of the _n_th character:
let byte_index: Option<usize> = user_keyword
.char_indices()
.take(self.fuzzy_length)
.map(|(idx, _ch)| idx)
.max();
// Use the first _n_ characters of the user's keyword. These
// first characters are used to find search index keywords to
// fuzzy match against:
match byte_index {
Some(byte_index) => &user_keyword[0..byte_index],
None => return vec![],
} // match
} else {
// The user's keyword is too short. Do not perform any fuzzy
// matching:
Expand Down
16 changes: 13 additions & 3 deletions src/simple/internal/eddie/eddie_global_keyword.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,19 @@ impl<K: Hash + Ord> SearchIndex<K> {
// The user keyword must be longer than the match length to be
// evaluated for fuzzy-matches:
if user_keyword.len() >= self.fuzzy_length {
// Use the first _n_ characters of the user's keyword to find
// search index keywords to compare against:
&user_keyword[0..self.fuzzy_length]
// Get the byte index of the _n_th character:
let byte_index: Option<usize> = user_keyword
.char_indices()
.take(self.fuzzy_length)
.map(|(idx, _ch)| idx)
.max();
// Use the first _n_ characters of the user's keyword. These
// first characters are used to find search index keywords to
// fuzzy match against:
match byte_index {
Some(byte_index) => &user_keyword[0..byte_index],
None => return vec![],
} // match
} else {
// The user's keyword is too short. Do not perform any fuzzy
// matching:
Expand Down
16 changes: 13 additions & 3 deletions src/simple/internal/strsim/strsim_context_autocomplete.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,19 @@ impl<K: Hash + Ord> SearchIndex<K> {
// The user keyword must be longer than the match length to be
// evaluated for fuzzy-matches:
if user_keyword.len() >= self.fuzzy_length {
// Use the first _n_ characters of the user's keyword to find
// search index keywords to compare against:
&user_keyword[0..self.fuzzy_length]
// Get the byte index of the _n_th character:
let byte_index: Option<usize> = user_keyword
.char_indices()
.take(self.fuzzy_length)
.map(|(idx, _ch)| idx)
.max();
// Use the first _n_ characters of the user's keyword. These
// first characters are used to find search index keywords to
// fuzzy match against:
match byte_index {
Some(byte_index) => &user_keyword[0..byte_index],
None => return vec![],
} // match
} else {
// The user's keyword is too short. Do not perform any fuzzy
// matching:
Expand Down
16 changes: 13 additions & 3 deletions src/simple/internal/strsim/strsim_global_autocomplete.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,19 @@ impl<K: Hash + Ord> SearchIndex<K> {
// The user keyword must be longer than the match length to be
// evaluated for fuzzy-matches:
if user_keyword.len() >= self.fuzzy_length {
// Use the first _n_ characters of the user's keyword to find
// search index keywords to compare against:
&user_keyword[0..self.fuzzy_length]
// Get the byte index of the _n_th character:
let byte_index: Option<usize> = user_keyword
.char_indices()
.take(self.fuzzy_length)
.map(|(idx, _ch)| idx)
.max();
// Use the first _n_ characters of the user's keyword. These
// first characters are used to find search index keywords to
// fuzzy match against:
match byte_index {
Some(byte_index) => &user_keyword[0..byte_index],
None => return vec![],
} // match
} else {
// The user's keyword is too short. Do not perform any fuzzy
// matching:
Expand Down
16 changes: 13 additions & 3 deletions src/simple/internal/strsim/strsim_global_keyword.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,19 @@ impl<K: Hash + Ord> SearchIndex<K> {
// The user keyword must be longer than the match length to be
// evaluated for fuzzy-matches:
if user_keyword.len() >= self.fuzzy_length {
// Use the first _n_ characters of the user's keyword to find
// search index keywords to compare against:
&user_keyword[0..self.fuzzy_length]
// Get the byte index of the _n_th character:
let byte_index: Option<usize> = user_keyword
.char_indices()
.take(self.fuzzy_length)
.map(|(idx, _ch)| idx)
.max();
// Use the first _n_ characters of the user's keyword. These
// first characters are used to find search index keywords to
// fuzzy match against:
match byte_index {
Some(byte_index) => &user_keyword[0..byte_index],
None => return None,
} // match
} else {
// The user's keyword is too short. Do not perform any fuzzy
// matching:
Expand Down
5 changes: 5 additions & 0 deletions src/simple/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -291,4 +291,9 @@ fn simple() {
search_index.autocomplete_type(&AutocompleteType::Context, "stars are dancers");
#[cfg(any(feature = "eddie", feature = "strsim"))]
assert_eq!(autocomplete_options, vec!["stars are dancing".to_string()]);

// Test UTF-8:
let index = crate::simple::SearchIndex::<usize>::default();
index.search("лол"); // lol in Cyrillic

} // fn

0 comments on commit 4ff7ebb

Please sign in to comment.