From df4d2cd8761a3a6b46a35d9d543d6b4d4d4f6b12 Mon Sep 17 00:00:00 2001 From: Riccardo Mazzarini Date: Mon, 20 Nov 2023 00:13:46 +0100 Subject: [PATCH] add `Opts` --- src/algos/fzf/common.rs | 135 ++++++++++----------- src/algos/fzf/opts.rs | 148 +++++++++++++++++++++++ src/algos/fzf/query.rs | 91 +++++---------- src/algos/fzf/v1.rs | 213 ++++++++++++++++----------------- src/algos/fzf/v2.rs | 253 +++++++++++++++++++--------------------- src/lib.rs | 3 +- src/opts.rs | 159 +++++++++++++++++++++++++ src/utils.rs | 132 +-------------------- 8 files changed, 626 insertions(+), 508 deletions(-) create mode 100644 src/algos/fzf/opts.rs create mode 100644 src/opts.rs diff --git a/src/algos/fzf/common.rs b/src/algos/fzf/common.rs index ea9187d..24d9029 100644 --- a/src/algos/fzf/common.rs +++ b/src/algos/fzf/common.rs @@ -8,11 +8,10 @@ use crate::*; pub(super) fn calculate_score( pattern: Pattern, candidate: &str, - range: Range, + candidate_range: Range, + opts: impl Opts, scheme: &Scheme, - char_eq: CharEq, - with_matched_ranges: bool, - matched_ranges: &mut MatchedRanges, + mut ranges_buf: Option<&mut MatchedRanges>, ) -> Score { // TODO: docs let mut is_in_gap = false; @@ -26,9 +25,9 @@ pub(super) fn calculate_score( // TODO: docs let mut consecutive = 0u32; - let range_start = range.start; + let range_start = candidate_range.start; - let mut prev_class = candidate[..range.start] + let mut prev_class = candidate[..candidate_range.start] .chars() .next_back() .map(|ch| char_class(ch, scheme)) @@ -40,10 +39,10 @@ pub(super) fn calculate_score( let mut score: Score = 0; - for (offset, candidate_ch) in candidate[range].char_indices() { + for (offset, candidate_ch) in candidate[candidate_range].char_indices() { let ch_class = char_class(candidate_ch, scheme); - if char_eq(pattern_char, candidate_ch) { + if opts.char_eq(pattern_char, candidate_ch) { score += bonus::MATCH; let mut bonus = bonus(prev_class, ch_class, scheme); @@ -63,10 +62,10 @@ pub(super) fn calculate_score( bonus }; - if with_matched_ranges { + if let Some(ranges) = &mut ranges_buf { let start = range_start + offset; let end = start + candidate_ch.len_utf8(); - matched_ranges.insert(start..end); + ranges.insert(start..end); } is_in_gap = false; @@ -105,10 +104,9 @@ pub(super) fn calculate_score( pub(super) fn exact_match( pattern: Pattern, candidate: &str, + opts: impl Opts, scheme: &Scheme, - char_eq: CharEq, - with_matched_ranges: bool, - matched_ranges: &mut MatchedRanges, + ranges_buf: Option<&mut MatchedRanges>, ) -> Option { if pattern.is_empty() { return Some(0); @@ -144,7 +142,7 @@ pub(super) fn exact_match( let char_class = char_class(candidate_ch, scheme); - if char_eq(pattern_ch, candidate_ch) { + if opts.char_eq(pattern_ch, candidate_ch) { if pattern_char_idx == 0 { bonus_start = current_start_offset + byte_offset; start_offset += byte_offset + candidate_ch.len_utf8(); @@ -194,14 +192,13 @@ pub(super) fn exact_match( pattern, candidate, matched_range.clone(), + opts, scheme, - char_eq, - false, - matched_ranges, + None, ); - if with_matched_ranges { - matched_ranges.insert(matched_range); + if let Some(ranges) = ranges_buf { + ranges.insert(matched_range); } Some(score) @@ -212,10 +209,9 @@ pub(super) fn exact_match( pub(super) fn prefix_match( pattern: Pattern, candidate: &str, + opts: impl Opts, scheme: &Scheme, - char_eq: CharEq, - with_matched_ranges: bool, - matched_ranges: &mut MatchedRanges, + ranges_buf: Option<&mut MatchedRanges>, ) -> Option { if pattern.is_empty() { return Some(0); @@ -231,7 +227,7 @@ pub(super) fn prefix_match( for (candidate_ch, pattern_ch) in candidate[ignored_leading_spaces..].chars().zip(pattern_chars.by_ref()) { - if !char_eq(pattern_ch, candidate_ch) { + if !opts.char_eq(pattern_ch, candidate_ch) { return None; } match_byte_len += candidate_ch.len_utf8(); @@ -248,14 +244,13 @@ pub(super) fn prefix_match( pattern, candidate, matched_range.clone(), + opts, scheme, - char_eq, - false, - matched_ranges, + None, ); - if with_matched_ranges { - matched_ranges.insert(matched_range); + if let Some(ranges) = ranges_buf { + ranges.insert(matched_range); } Some(score) @@ -266,10 +261,9 @@ pub(super) fn prefix_match( pub(super) fn suffix_match( pattern: Pattern, candidate: &str, + opts: impl Opts, scheme: &Scheme, - char_eq: CharEq, - with_matched_ranges: bool, - matched_ranges: &mut MatchedRanges, + ranges_buf: Option<&mut MatchedRanges>, ) -> Option { if pattern.is_empty() { return Some(0); @@ -287,7 +281,7 @@ pub(super) fn suffix_match( .rev() .zip(pattern_chars.by_ref()) { - if !char_eq(pattern_ch, candidate_ch) { + if !opts.char_eq(pattern_ch, candidate_ch) { return None; } match_byte_len += candidate_ch.len_utf8(); @@ -304,14 +298,13 @@ pub(super) fn suffix_match( pattern, candidate, matched_range.clone(), + opts, scheme, - char_eq, - false, - matched_ranges, + None, ); - if with_matched_ranges { - matched_ranges.insert(matched_range); + if let Some(ranges) = ranges_buf { + ranges.insert(matched_range); } Some(score) @@ -322,10 +315,9 @@ pub(super) fn suffix_match( pub(super) fn equal_match( pattern: Pattern, candidate: &str, + opts: impl Opts, scheme: &Scheme, - char_eq: CharEq, - with_matched_ranges: bool, - matched_ranges: &mut MatchedRanges, + ranges_buf: Option<&mut MatchedRanges>, ) -> Option { if pattern.is_empty() { return Some(0); @@ -358,7 +350,7 @@ pub(super) fn equal_match( for (pattern_ch, candidate_ch) in pattern_chars.by_ref().zip(candidate_chars.by_ref()) { - if !char_eq(pattern_ch, candidate_ch) { + if !opts.char_eq(pattern_ch, candidate_ch) { return None; } } @@ -371,14 +363,13 @@ pub(super) fn equal_match( pattern, candidate, matched_range.clone(), + opts, scheme, - char_eq, - false, - matched_ranges, + None, ); - if with_matched_ranges { - matched_ranges.insert(matched_range); + if let Some(ranges) = ranges_buf { + ranges.insert(matched_range); } Some(score) @@ -425,80 +416,75 @@ mod tests { let pattern = Pattern::parse("^AbC$".chars().collect::>().leak()); - let mut matched_ranges = MatchedRanges::default(); + let mut ranges_buf = MatchedRanges::default(); assert!(exact_match( pattern, "ABC", + AsciiCandidateOpts::new(true), &Scheme::default(), - utils::char_eq(true, false), - true, - &mut matched_ranges + Some(&mut ranges_buf) ) .is_none()); { - matched_ranges = MatchedRanges::default(); + ranges_buf = MatchedRanges::default(); assert!(exact_match( pattern, "AbC", + AsciiCandidateOpts::new(true), &Scheme::default(), - utils::char_eq(true, false), - true, - &mut matched_ranges + Some(&mut ranges_buf) ) .is_some()); - assert_eq!(matched_ranges.as_slice(), [0..3]); + assert_eq!(ranges_buf.as_slice(), [0..3]); } { - matched_ranges = MatchedRanges::default(); + ranges_buf = MatchedRanges::default(); assert!(exact_match( pattern, "AbC ", + AsciiCandidateOpts::new(true), &Scheme::default(), - utils::char_eq(true, false), - true, - &mut matched_ranges + Some(&mut ranges_buf) ) .is_some()); - assert_eq!(matched_ranges.as_slice(), [0..3]); + assert_eq!(ranges_buf.as_slice(), [0..3]); } { - matched_ranges = MatchedRanges::default(); + ranges_buf = MatchedRanges::default(); assert!(exact_match( pattern, " AbC ", + AsciiCandidateOpts::new(true), &Scheme::default(), - utils::char_eq(true, false), - true, - &mut matched_ranges + Some(&mut ranges_buf) ) .is_some()); - assert_eq!(matched_ranges.as_slice(), [1..4]); + assert_eq!(ranges_buf.as_slice(), [1..4]); } { - matched_ranges = MatchedRanges::default(); + ranges_buf = MatchedRanges::default(); assert!(exact_match( pattern, " AbC", + AsciiCandidateOpts::new(true), &Scheme::default(), - utils::char_eq(true, false), - true, - &mut matched_ranges + Some(&mut ranges_buf) ) .is_some()); - assert_eq!(matched_ranges.as_slice(), [2..5]); + assert_eq!(ranges_buf.as_slice(), [2..5]); } } @@ -506,18 +492,17 @@ mod tests { fn exact_match_1() { let pattern = Pattern::parse("abc".chars().collect::>().leak()); - let mut matched_ranges = MatchedRanges::default(); + let mut ranges_buf = MatchedRanges::default(); assert!(exact_match( pattern, "aabbcc abc", + AsciiCandidateOpts::new(true), &Scheme::default(), - utils::char_eq(true, false), - true, - &mut matched_ranges + Some(&mut ranges_buf) ) .is_some()); - assert_eq!(matched_ranges.as_slice(), [7..10]); + assert_eq!(ranges_buf.as_slice(), [7..10]); } } diff --git a/src/algos/fzf/opts.rs b/src/algos/fzf/opts.rs new file mode 100644 index 0000000..8a93eb2 --- /dev/null +++ b/src/algos/fzf/opts.rs @@ -0,0 +1,148 @@ +use crate::utils::*; + +/// TODO: docs +pub(crate) trait Opts: Copy { + /// TODO: docs + fn char_eq(&self, query_ch: char, candidate_ch: char) -> bool; + + /// TODO: docs + fn find_first( + &self, + query_ch: char, + candidate: &str, + ) -> Option<(usize, usize)>; + + /// TODO: docs + fn find_last( + &self, + query_ch: char, + candidate: &str, + ) -> Option<(usize, usize)>; + + /// TODO: docs + fn to_char_offset(&self, candidate: &str, byte_offset: usize) -> usize; +} + +#[derive(Clone, Copy)] +pub(crate) struct AsciiCandidateOpts { + is_case_sensitive: bool, + char_eq: CharEq, +} + +impl AsciiCandidateOpts { + #[inline(always)] + pub fn new(is_case_sensitive: bool, normalize_candidate: bool) -> Self { + Self { is_case_sensitive, char_eq: char_eq(is_case_sensitive, false) } + } +} + +impl Opts for AsciiCandidateOpts { + #[inline(always)] + fn char_eq(&self, query_ch: char, candidate_ch: char) -> bool { + self.char_eq(query_ch, candidate_ch) + } + + #[inline(always)] + fn to_char_offset(&self, _: &str, byte_offset: usize) -> usize { + byte_offset + } + + #[inline(always)] + fn find_first( + &self, + query_ch: char, + candidate: &str, + ) -> Option<(usize, usize)> { + if !query_ch.is_ascii() { + return None; + }; + + let query_byte = query_ch as u8; + + let offset = + if self.is_case_sensitive || !query_byte.is_ascii_alphabetic() { + memchr::memchr(query_byte, candidate.as_bytes()) + } else { + memchr::memchr2( + query_byte, + ascii_letter_flip_case(query_byte), + candidate.as_bytes(), + ) + }?; + + Some((offset, 1)) + } + + #[inline(always)] + fn find_last( + &self, + query_ch: char, + candidate: &str, + ) -> Option<(usize, usize)> { + if !query_ch.is_ascii() { + return None; + }; + + let query_byte = query_ch as u8; + + let offset = if self.is_case_sensitive + || !query_byte.is_ascii_alphabetic() + { + memchr::memchr_iter(query_byte, candidate.as_bytes()).next_back() + } else { + memchr::memchr2_iter( + query_byte, + ascii_letter_flip_case(query_byte), + candidate.as_bytes(), + ) + .next_back() + }?; + + Some((offset, 1)) + } +} + +#[derive(Clone, Copy)] +pub(crate) struct UnicodeCandidateOpts(CharEq); + +impl UnicodeCandidateOpts { + #[inline(always)] + pub fn new(is_case_sensitive: bool, normalize_candidate: bool) -> Self { + Self(char_eq(is_case_sensitive, normalize_candidate)) + } +} + +impl Opts for UnicodeCandidateOpts { + #[inline(always)] + fn char_eq(&self, query_ch: char, candidate_ch: char) -> bool { + self.0(query_ch, candidate_ch) + } + + #[inline(always)] + fn to_char_offset(&self, candidate: &str, byte_offset: usize) -> usize { + char_len(&candidate[..byte_offset]) + } + + #[inline(always)] + fn find_first( + &self, + query_ch: char, + candidate: &str, + ) -> Option<(usize, usize)> { + candidate.char_indices().find_map(|(offset, ch)| { + self.0(query_ch, ch).then_some((offset, ch.len_utf8())) + }) + } + + #[inline(always)] + fn find_last( + &self, + query_ch: char, + candidate: &str, + ) -> Option<(usize, usize)> { + candidate.char_indices().find_map(|(offset, candidate_ch)| { + self.char_eq(query_ch, candidate_ch) + .then_some((offset, candidate_ch.len_utf8())) + }) + } +} diff --git a/src/algos/fzf/query.rs b/src/algos/fzf/query.rs index 05e9cd2..4f62ab3 100644 --- a/src/algos/fzf/query.rs +++ b/src/algos/fzf/query.rs @@ -4,15 +4,13 @@ use super::*; use crate::*; /// TODO: docs -type FuzzyAlgo = fn( +type FuzzyAlgo = fn( Pattern, &str, + O, &Scheme, - CharEq, - bool, - bool, + Option<&mut MatchedRanges>, T, - &mut MatchedRanges, ) -> Option; /// A parsed fzf query. @@ -302,73 +300,44 @@ impl<'a> Pattern<'a> { /// TODO: docs #[inline] - pub(super) fn score( + pub(super) fn score( self, candidate: &str, + opts: O, scheme: &Scheme, - char_eq: CharEq, - is_case_sensitive: bool, - mut with_matched_ranges: bool, - extras: Extras, - matched_ranges: &mut MatchedRanges, - fuzzy_algo: FuzzyAlgo, + mut ranges_buf: Option<&mut MatchedRanges>, + extra: E, + fuzzy_algo: FuzzyAlgo, ) -> Option { - with_matched_ranges &= !self.is_inverse; + if self.is_inverse { + ranges_buf = None; + } let result = match self.match_type { - MatchType::Fuzzy => fuzzy_algo( - self, - candidate, - scheme, - char_eq, - is_case_sensitive, - with_matched_ranges, - extras, - matched_ranges, - ), - - MatchType::Exact => exact_match( - self, - candidate, - scheme, - char_eq, - with_matched_ranges, - matched_ranges, - ), - - MatchType::PrefixExact => prefix_match( - self, - candidate, - scheme, - char_eq, - with_matched_ranges, - matched_ranges, - ), - - MatchType::SuffixExact => suffix_match( - self, - candidate, - scheme, - char_eq, - with_matched_ranges, - matched_ranges, - ), - - MatchType::EqualExact => equal_match( - self, - candidate, - scheme, - char_eq, - with_matched_ranges, - matched_ranges, - ), + MatchType::Fuzzy => { + fuzzy_algo(self, candidate, opts, scheme, ranges_buf, extra) + }, + + MatchType::Exact => { + exact_match(self, candidate, opts, scheme, ranges_buf) + }, + + MatchType::PrefixExact => { + prefix_match(self, candidate, opts, scheme, ranges_buf) + }, + + MatchType::SuffixExact => { + suffix_match(self, candidate, opts, scheme, ranges_buf) + }, + + MatchType::EqualExact => { + equal_match(self, candidate, opts, scheme, ranges_buf) + }, }; match (result.is_some(), self.is_inverse) { (true, false) => result, - (false, true) => Some(0), - _ => None, } } diff --git a/src/algos/fzf/v1.rs b/src/algos/fzf/v1.rs index 20e0220..019b24a 100644 --- a/src/algos/fzf/v1.rs +++ b/src/algos/fzf/v1.rs @@ -34,7 +34,7 @@ impl core::fmt::Debug for FzfV1 { impl FzfV1 { /// TODO: docs - #[inline] + #[inline(always)] pub fn new() -> Self { Self::default() } @@ -46,7 +46,43 @@ impl FzfV1 { } /// TODO: docs - #[inline] + #[inline(always)] + fn score( + &mut self, + pattern: Pattern, + candidate: &str, + is_candidate_ascii: bool, + buf: Option<&mut MatchedRanges>, + ) -> Option { + let is_sensitive = match self.case_sensitivity { + CaseSensitivity::Sensitive => true, + CaseSensitivity::Insensitive => false, + CaseSensitivity::Smart => pattern.has_uppercase, + }; + + if is_candidate_ascii { + fzf_v1( + pattern, + candidate, + AsciiCandidateOpts::new(is_sensitive), + &self.scheme, + buf, + (), + ) + } else { + fzf_v1( + pattern, + candidate, + UnicodeCandidateOpts::new(is_sensitive, self.normalization), + &self.scheme, + buf, + (), + ) + } + } + + /// TODO: docs + #[inline(always)] pub fn with_case_sensitivity( &mut self, case_sensitivity: CaseSensitivity, @@ -56,21 +92,21 @@ impl FzfV1 { } /// TODO: docs - #[inline] + #[inline(always)] pub fn with_matched_ranges(&mut self, matched_ranges: bool) -> &mut Self { self.with_matched_ranges = matched_ranges; self } /// TODO: docs - #[inline] + #[inline(always)] pub fn with_normalization(&mut self, normalization: bool) -> &mut Self { self.normalization = normalization; self } /// TODO: docs - #[inline] + #[inline(always)] pub fn with_scoring_scheme(&mut self, scheme: FzfScheme) -> &mut Self { self.scheme = scheme.into_inner(); self @@ -82,7 +118,7 @@ impl Metric for FzfV1 { type Distance = FzfDistance; - #[inline] + #[inline(always)] fn distance( &mut self, query: FzfQuery<'_>, @@ -94,61 +130,62 @@ impl Metric for FzfV1 { let is_candidate_ascii = candidate.is_ascii(); - let mut matched_ranges = MatchedRanges::default(); + let mut buf = if self.with_matched_ranges { + Some(MatchedRanges::default()) + } else { + None + }; let conditions = match query.search_mode { - SearchMode::NotExtended(pattern) => { - let is_case_sensitive = match self.case_sensitivity { - CaseSensitivity::Sensitive => true, - CaseSensitivity::Insensitive => false, - CaseSensitivity::Smart => pattern.has_uppercase, - }; - - let char_eq = - utils::char_eq(is_case_sensitive, self.normalization); - - let score = fzf_v1( - pattern, - candidate, - &self.scheme, - char_eq, - is_case_sensitive, - self.with_matched_ranges, - is_candidate_ascii, - &mut matched_ranges, - )?; - - let distance = FzfDistance::from_score(score); + SearchMode::Extended(conditions) => conditions, - return Some(Match::new(distance, matched_ranges)); + SearchMode::NotExtended(pattern) => { + return self + .score( + pattern, + candidate, + is_candidate_ascii, + buf.as_mut(), + ) + .map(FzfDistance::from_score) + .map(|distance| { + Match::new(distance, buf.unwrap_or_default()) + }) }, - - SearchMode::Extended(conditions) => conditions, }; let mut total_score = 0; for condition in conditions { let score = condition.iter().find_map(|pattern| { - let is_case_sensitive = match self.case_sensitivity { + let is_sensitive = match self.case_sensitivity { CaseSensitivity::Sensitive => true, CaseSensitivity::Insensitive => false, CaseSensitivity::Smart => pattern.has_uppercase, }; - let char_eq = - utils::char_eq(is_case_sensitive, self.normalization); - - pattern.score( - candidate, - &self.scheme, - char_eq, - is_case_sensitive, - self.with_matched_ranges, - is_candidate_ascii, - &mut matched_ranges, - fzf_v1, - ) + if is_candidate_ascii { + pattern.score( + candidate, + AsciiCandidateOpts::new(is_sensitive), + &self.scheme, + buf.as_mut(), + (), + fzf_v1, + ) + } else { + pattern.score( + candidate, + UnicodeCandidateOpts::new( + is_sensitive, + self.normalization, + ), + &self.scheme, + buf.as_mut(), + (), + fzf_v1, + ) + } })?; total_score += score; @@ -156,7 +193,7 @@ impl Metric for FzfV1 { let distance = FzfDistance::from_score(total_score); - Some(Match::new(distance, matched_ranges)) + Some(Match::new(distance, buf.unwrap_or_default())) } #[inline] @@ -175,44 +212,24 @@ impl Metric for FzfV1 { pub(super) fn fzf_v1( pattern: Pattern, candidate: &str, + opts: impl Opts, scheme: &Scheme, - char_eq: CharEq, - is_case_sensitive: bool, - with_matched_ranges: bool, - is_candidate_ascii: bool, - matched_ranges: &mut MatchedRanges, + ranges_buf: Option<&mut MatchedRanges>, + _: (), ) -> Option { if pattern.is_empty() { return Some(0); } - let range_forward = forward_pass( - pattern, - candidate, - is_candidate_ascii, - is_case_sensitive, - char_eq, - )?; - - let start_backward = backward_pass( - pattern, - &candidate[range_forward.clone()], - is_candidate_ascii, - is_case_sensitive, - char_eq, - ); + let range_forward = forward_pass(pattern, candidate, opts)?; + + let start_backward = + backward_pass(pattern, &candidate[range_forward.clone()], opts); let range = range_forward.start + start_backward..range_forward.end; - let score = calculate_score( - pattern, - candidate, - range, - scheme, - char_eq, - with_matched_ranges, - matched_ranges, - ); + let score = + calculate_score(pattern, candidate, range, opts, scheme, ranges_buf); Some(score) } @@ -222,23 +239,14 @@ pub(super) fn fzf_v1( fn forward_pass( pattern: Pattern, mut candidate: &str, - is_candidate_ascii: bool, - is_case_sensitive: bool, - char_eq: CharEq, + opts: impl Opts, ) -> Option> { let mut pattern_chars = pattern.chars(); let mut pattern_char = pattern_chars.next()?; - let (start_offset, matched_char) = utils::find_first( - pattern_char, - candidate, - is_candidate_ascii, - is_case_sensitive, - char_eq, - )?; - - let matched_char_byte_len = matched_char.len_utf8(); + let (start_offset, matched_char_byte_len) = + opts.find_first(pattern_char, candidate)?; let mut end_offset = start_offset + matched_char_byte_len; @@ -252,15 +260,8 @@ fn forward_pass( candidate = unsafe { candidate.get_unchecked(end_offset..) }; loop { - let (byte_offset, matched_char) = utils::find_first( - pattern_char, - candidate, - is_candidate_ascii, - is_case_sensitive, - char_eq, - )?; - - let matched_char_byte_len = matched_char.len_utf8(); + let (byte_offset, matched_char_byte_len) = + opts.find_first(pattern_char, candidate)?; end_offset += byte_offset + matched_char_byte_len; @@ -282,18 +283,16 @@ fn forward_pass( fn backward_pass( pattern: Pattern, mut candidate: &str, - is_candidate_ascii: bool, - is_case_sensitive: bool, - char_eq: CharEq, + opts: impl Opts, ) -> usize { // The candidate must start with the first character of the query. - debug_assert!(char_eq( + debug_assert!(opts.char_eq( pattern.chars().next().unwrap(), candidate.chars().next().unwrap(), )); // The candidate must end with the last character of the query. - debug_assert!(char_eq( + debug_assert!(opts.char_eq( pattern.chars().next_back().unwrap(), candidate.chars().next_back().unwrap(), )); @@ -303,14 +302,8 @@ fn backward_pass( let mut pattern_char = pattern_chars.next().expect("pattern is not empty"); loop { - let (byte_offset, _) = utils::find_last( - pattern_char, - candidate, - is_candidate_ascii, - is_case_sensitive, - char_eq, - ) - .unwrap(); + let (byte_offset, _) = + opts.find_last(pattern_char, candidate).unwrap(); if let Some(next) = pattern_chars.next() { pattern_char = next; diff --git a/src/algos/fzf/v2.rs b/src/algos/fzf/v2.rs index ec681f6..20a4c89 100644 --- a/src/algos/fzf/v2.rs +++ b/src/algos/fzf/v2.rs @@ -1,6 +1,7 @@ use core::ops::Range; use super::{query::*, scoring::*, slab::*, *}; +use crate::Opts; use crate::*; /// TODO: docs @@ -37,7 +38,7 @@ impl core::fmt::Debug for FzfV2 { impl FzfV2 { /// TODO: docs - #[inline] + #[inline(always)] pub fn new() -> Self { Self::default() } @@ -49,7 +50,43 @@ impl FzfV2 { } /// TODO: docs - #[inline] + #[inline(always)] + fn score( + &mut self, + pattern: Pattern, + candidate: &str, + is_candidate_ascii: bool, + buf: Option<&mut MatchedRanges>, + ) -> Option { + let is_sensitive = match self.case_sensitivity { + CaseSensitivity::Sensitive => true, + CaseSensitivity::Insensitive => false, + CaseSensitivity::Smart => pattern.has_uppercase, + }; + + if is_candidate_ascii { + fzf_v2( + pattern, + candidate, + AsciiCandidateOpts::new(is_sensitive), + &self.scheme, + buf, + &mut self.slab, + ) + } else { + fzf_v2( + pattern, + candidate, + UnicodeCandidateOpts::new(is_sensitive, self.normalization), + &self.scheme, + buf, + &mut self.slab, + ) + } + } + + /// TODO: docs + #[inline(always)] pub fn with_case_sensitivity( &mut self, case_sensitivity: CaseSensitivity, @@ -59,21 +96,21 @@ impl FzfV2 { } /// TODO: docs - #[inline] + #[inline(always)] pub fn with_matched_ranges(&mut self, matched_ranges: bool) -> &mut Self { self.with_matched_ranges = matched_ranges; self } /// TODO: docs - #[inline] + #[inline(always)] pub fn with_normalization(&mut self, normalization: bool) -> &mut Self { self.normalization = normalization; self } /// TODO: docs - #[inline] + #[inline(always)] pub fn with_scoring_scheme(&mut self, scheme: FzfScheme) -> &mut Self { self.scheme = scheme.into_inner(); self @@ -85,7 +122,7 @@ impl Metric for FzfV2 { type Distance = FzfDistance; - #[inline] + #[inline(always)] fn distance( &mut self, query: FzfQuery<'_>, @@ -97,61 +134,62 @@ impl Metric for FzfV2 { let is_candidate_ascii = candidate.is_ascii(); - let mut matched_ranges = MatchedRanges::default(); + let mut buf = if self.with_matched_ranges { + Some(MatchedRanges::default()) + } else { + None + }; let conditions = match query.search_mode { - SearchMode::NotExtended(pattern) => { - let is_case_sensitive = match self.case_sensitivity { - CaseSensitivity::Sensitive => true, - CaseSensitivity::Insensitive => false, - CaseSensitivity::Smart => pattern.has_uppercase, - }; - - let char_eq = - utils::char_eq(is_case_sensitive, self.normalization); - - let score = fzf_v2( - pattern, - candidate, - &self.scheme, - char_eq, - is_case_sensitive, - self.with_matched_ranges, - (&mut self.slab, is_candidate_ascii), - &mut matched_ranges, - )?; - - let distance = FzfDistance::from_score(score); + SearchMode::Extended(conditions) => conditions, - return Some(Match::new(distance, matched_ranges)); + SearchMode::NotExtended(pattern) => { + return self + .score( + pattern, + candidate, + is_candidate_ascii, + buf.as_mut(), + ) + .map(FzfDistance::from_score) + .map(|distance| { + Match::new(distance, buf.unwrap_or_default()) + }) }, - - SearchMode::Extended(conditions) => conditions, }; let mut total_score = 0; for condition in conditions { let score = condition.iter().find_map(|pattern| { - let is_case_sensitive = match self.case_sensitivity { + let is_sensitive = match self.case_sensitivity { CaseSensitivity::Sensitive => true, CaseSensitivity::Insensitive => false, CaseSensitivity::Smart => pattern.has_uppercase, }; - let char_eq = - utils::char_eq(is_case_sensitive, self.normalization); - - pattern.score( - candidate, - &self.scheme, - char_eq, - is_case_sensitive, - self.with_matched_ranges, - (&mut self.slab, is_candidate_ascii), - &mut matched_ranges, - fzf_v2, - ) + if is_candidate_ascii { + pattern.score( + candidate, + AsciiCandidateOpts::new(is_sensitive), + &self.scheme, + buf.as_mut(), + &mut self.slab, + fzf_v2, + ) + } else { + pattern.score( + candidate, + UnicodeCandidateOpts::new( + is_sensitive, + self.normalization, + ), + &self.scheme, + buf.as_mut(), + &mut self.slab, + fzf_v2, + ) + } })?; total_score += score; @@ -159,7 +197,7 @@ impl Metric for FzfV2 { let distance = FzfDistance::from_score(total_score); - Some(Match::new(distance, matched_ranges)) + Some(Match::new(distance, buf.unwrap_or_default())) } #[inline] @@ -169,7 +207,7 @@ impl Metric for FzfV2 { _candidate: &str, _ranges_buf: &mut Vec>, ) -> Option { - todo!() + todo!(); } } @@ -178,25 +216,17 @@ impl Metric for FzfV2 { pub(super) fn fzf_v2( pattern: Pattern, candidate: &str, + opts: impl Opts, scheme: &Scheme, - char_eq: CharEq, - is_case_sensitive: bool, - with_matched_ranges: bool, - (slab, is_candidate_ascii): (&mut V2Slab, bool), - ranges: &mut MatchedRanges, + ranges_buf: Option<&mut MatchedRanges>, + slab: &mut V2Slab, ) -> Option { if pattern.is_empty() { return Some(0); } - let (matches, last_match_offset) = matches( - &mut slab.matched_indices, - pattern, - candidate, - is_case_sensitive, - is_candidate_ascii, - char_eq, - )?; + let (matches, last_match_offset) = + matches(&mut slab.matched_indices, pattern, candidate, opts)?; let first_match = matches[0]; @@ -225,21 +255,19 @@ pub(super) fn fzf_v2( &mut slab.consecutive_matrix, pattern, candidate, - is_case_sensitive, - is_candidate_ascii, - char_eq, matches, bonus_vector, + opts, ); - if with_matched_ranges { + if let Some(buf) = ranges_buf { matched_ranges( scores, consecutive, score_cell, candidate, first_match.byte_offset, - ranges, + buf, ); }; @@ -252,9 +280,7 @@ fn matches<'idx>( indices_slab: &'idx mut MatchedIndicesSlab, pattern: Pattern, mut candidate: &str, - is_case_sensitive: bool, - is_candidate_ascii: bool, - char_eq: CharEq, + opts: impl Opts, ) -> Option<(&'idx mut [MatchedIdx], usize)> { let matched_idxs = indices_slab.alloc(pattern.char_len()); @@ -265,26 +291,15 @@ fn matches<'idx>( loop { let query_char = pattern.char(query_char_idx); - let (byte_offset, matched_char) = utils::find_first( - query_char, - candidate, - is_candidate_ascii, - is_case_sensitive, - char_eq, - )?; + let (byte_offset, matched_char_byte_len) = + opts.find_first(query_char, candidate)?; - let char_offset = if is_candidate_ascii { - byte_offset - } else { - utils::char_len(&candidate[..byte_offset]) - }; + let char_offset = opts.to_char_offset(candidate, byte_offset); last_matched_idx += MatchedIdx { byte_offset, char_offset }; matched_idxs[query_char_idx] = last_matched_idx; - let matched_char_byte_len = matched_char.len_utf8(); - // SAFETY: the start of the range is within the byte length of the // candidate and it's a valid char boundary. candidate = unsafe { @@ -302,14 +317,10 @@ fn matches<'idx>( } let last_char_offset_inclusive = last_matched_idx.byte_offset - + if let Some((byte_offset, matched_char)) = utils::find_last( - pattern.char(query_char_idx), - candidate, - is_candidate_ascii, - is_case_sensitive, - char_eq, - ) { - byte_offset + matched_char.len_utf8() + + if let Some((byte_offset, matched_char_byte_len)) = + opts.find_last(pattern.char(query_char_idx), candidate) + { + byte_offset + matched_char_byte_len } else { 0 }; @@ -345,11 +356,9 @@ fn score<'scoring, 'consecutive>( consecutive_slab: &'consecutive mut MatrixSlab, pattern: Pattern, candidate: &str, - is_case_sensitive: bool, - is_candidate_ascii: bool, - char_eq: CharEq, matches: &[MatchedIdx], bonus_vector: &[Score], + opts: impl Opts, ) -> (Matrix<'scoring, Score>, Matrix<'consecutive, usize>, Score, MatrixCell) { // The length of the bonus slice is the same as the character length of the @@ -369,9 +378,7 @@ fn score<'scoring, 'consecutive>( bonus_vector, pattern.char(0), candidate, - is_case_sensitive, - is_candidate_ascii, - char_eq, + opts, ); let (max_score, max_score_cell) = score_remaining_rows( @@ -381,9 +388,7 @@ fn score<'scoring, 'consecutive>( matches, candidate, bonus_vector, - is_case_sensitive, - is_candidate_ascii, - char_eq, + opts, max_score, max_score_cell, ); @@ -399,9 +404,7 @@ fn score_first_row( bonus_vector: &[Score], query_first_char: char, mut candidate: &str, - is_case_sensitive: bool, - is_candidate_ascii: bool, - char_eq: CharEq, + opts: impl Opts, ) -> (Score, MatrixCell) { let mut max_score: Score = 0; @@ -416,13 +419,9 @@ fn score_first_row( let mut penalty = penalty::GAP_START; while !candidate.is_empty() { - let Some((byte_idx, matched_char)) = utils::find_first( - query_first_char, - candidate, - is_candidate_ascii, - is_case_sensitive, - char_eq, - ) else { + let Some((byte_offset, matched_char_byte_len)) = + opts.find_first(query_first_char, candidate) + else { for col in col..scores_first_row.len() { let score = prev_score.saturating_sub(penalty); penalty = penalty::GAP_EXTENSION; @@ -433,15 +432,11 @@ fn score_first_row( break; }; - let char_idx = if is_candidate_ascii { - byte_idx - } else { - utils::char_len(&candidate[..byte_idx]) - }; + let char_offset = opts.to_char_offset(candidate, byte_offset); // TODO: explain what this does. { - for col in col..col + char_idx { + for col in col..col + char_offset { let score = prev_score.saturating_sub(penalty); penalty = penalty::GAP_EXTENSION; scores_first_row[col] = score; @@ -449,7 +444,7 @@ fn score_first_row( } } - col += char_idx; + col += char_offset; consecutives_first_row[col] = 1; @@ -467,7 +462,7 @@ fn score_first_row( col += 1; - candidate = &candidate[byte_idx + matched_char.len_utf8()..]; + candidate = &candidate[byte_offset + matched_char_byte_len..]; } (max_score, MatrixCell(max_score_col)) @@ -482,9 +477,7 @@ fn score_remaining_rows( matches: &[MatchedIdx], candidate: &str, bonus_vector: &[Score], - is_case_sensitive: bool, - is_candidate_ascii: bool, - char_eq: CharEq, + opts: impl Opts, mut max_score: Score, mut max_score_cell: MatrixCell, ) -> (Score, MatrixCell) { @@ -509,13 +502,9 @@ fn score_remaining_rows( let mut penalty = penalty::GAP_START; while !candidate.is_empty() { - let Some((byte_offset, matched_char)) = utils::find_first( - query_char, - candidate, - is_candidate_ascii, - is_case_sensitive, - char_eq, - ) else { + let Some((byte_offset, matched_char_byte_len)) = + opts.find_first(query_char, candidate) + else { for col in column..matrix_width { let score_left = scores_row[col - 1]; let score = score_left.saturating_sub(penalty); @@ -526,11 +515,7 @@ fn score_remaining_rows( break; }; - let char_offset = if is_candidate_ascii { - byte_offset - } else { - utils::char_len(&candidate[..byte_offset]) - }; + let char_offset = opts.to_char_offset(candidate, byte_offset); // TODO: explain what this does. penalty = penalty::GAP_START; @@ -590,7 +575,7 @@ fn score_remaining_rows( column += 1; - candidate = &candidate[byte_offset + matched_char.len_utf8()..]; + candidate = &candidate[byte_offset + matched_char_byte_len..]; } } diff --git a/src/lib.rs b/src/lib.rs index 416cf6d..d8f0a84 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -63,6 +63,7 @@ mod r#match; mod matched_ranges; mod metric; mod normalize; +mod opts; mod tiny_vec; mod utils; @@ -70,5 +71,5 @@ pub use algos::*; pub use case_sensitivity::CaseSensitivity; use matched_ranges::MatchedRanges; pub use metric::Metric; +use opts::*; pub use r#match::Match; -use utils::CharEq; diff --git a/src/opts.rs b/src/opts.rs new file mode 100644 index 0000000..ea7dfa6 --- /dev/null +++ b/src/opts.rs @@ -0,0 +1,159 @@ +use crate::utils::*; + +/// TODO: docs +pub(crate) trait Opts: Copy { + /// TODO: docs + fn char_eq(&self, query_ch: char, candidate_ch: char) -> bool; + + /// TODO: docs + fn find_first( + &self, + query_ch: char, + candidate: &str, + ) -> Option<(usize, usize)>; + + /// TODO: docs + fn find_last( + &self, + query_ch: char, + candidate: &str, + ) -> Option<(usize, usize)>; + + /// TODO: docs + fn to_char_offset(&self, candidate: &str, byte_offset: usize) -> usize; +} + +#[derive(Clone, Copy)] +pub(crate) struct AsciiCandidateOpts { + is_case_sensitive: bool, +} + +impl AsciiCandidateOpts { + #[inline(always)] + pub fn new(is_case_sensitive: bool) -> Self { + Self { is_case_sensitive } + } +} + +impl Opts for AsciiCandidateOpts { + #[inline(always)] + fn char_eq(&self, query_ch: char, candidate_ch: char) -> bool { + if self.is_case_sensitive { + query_ch == candidate_ch + } else { + query_ch.eq_ignore_ascii_case(&candidate_ch) + } + } + + #[inline(always)] + fn to_char_offset(&self, _: &str, byte_offset: usize) -> usize { + byte_offset + } + + #[inline(always)] + fn find_first( + &self, + query_ch: char, + candidate: &str, + ) -> Option<(usize, usize)> { + if !query_ch.is_ascii() { + return None; + }; + + let query_byte = query_ch as u8; + + let offset = + if self.is_case_sensitive || !query_byte.is_ascii_alphabetic() { + memchr::memchr(query_byte, candidate.as_bytes()) + } else { + memchr::memchr2( + query_byte, + ascii_letter_flip_case(query_byte), + candidate.as_bytes(), + ) + }?; + + Some((offset, 1)) + } + + #[inline(always)] + fn find_last( + &self, + query_ch: char, + candidate: &str, + ) -> Option<(usize, usize)> { + if !query_ch.is_ascii() { + return None; + }; + + let query_byte = query_ch as u8; + + let offset = if self.is_case_sensitive + || !query_byte.is_ascii_alphabetic() + { + memchr::memchr_iter(query_byte, candidate.as_bytes()).next_back() + } else { + memchr::memchr2_iter( + query_byte, + ascii_letter_flip_case(query_byte), + candidate.as_bytes(), + ) + .next_back() + }?; + + Some((offset, 1)) + } +} + +#[derive(Clone, Copy)] +pub(crate) struct UnicodeCandidateOpts(CharEq); + +impl UnicodeCandidateOpts { + #[inline(always)] + pub fn new(is_case_sensitive: bool, normalize_candidate: bool) -> Self { + let fun = match (is_case_sensitive, normalize_candidate) { + (false, false) => case_insensitive_eq, + (true, false) => case_sensitive_eq, + (false, true) => case_insensitive_normalized_eq, + (true, true) => case_sensitive_normalized_eq, + }; + + Self(fun) + } +} + +impl Opts for UnicodeCandidateOpts { + #[inline(always)] + fn char_eq(&self, query_ch: char, candidate_ch: char) -> bool { + self.0(query_ch, candidate_ch) + } + + #[inline(always)] + fn to_char_offset(&self, candidate: &str, byte_offset: usize) -> usize { + char_len(&candidate[..byte_offset]) + } + + #[inline(always)] + fn find_first( + &self, + query_ch: char, + candidate: &str, + ) -> Option<(usize, usize)> { + candidate.char_indices().find_map(|(offset, candidate_ch)| { + self.char_eq(query_ch, candidate_ch) + .then_some((offset, candidate_ch.len_utf8())) + }) + } + + #[inline(always)] + fn find_last( + &self, + query_ch: char, + candidate: &str, + ) -> Option<(usize, usize)> { + candidate.char_indices().rev().find_map(|(offset, candidate_ch)| { + self.char_eq(query_ch, candidate_ch) + .then_some((offset, candidate_ch.len_utf8())) + }) + } +} diff --git a/src/utils.rs b/src/utils.rs index 5cd9766..f461921 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -8,159 +8,37 @@ const ASCII_CASE_MASK: u8 = 0b0010_0000; /// TODO: docs #[inline(always)] -fn ascii_letter_flip_case(ascii_letter: u8) -> u8 { +pub fn ascii_letter_flip_case(ascii_letter: u8) -> u8 { debug_assert!(ascii_letter.is_ascii_alphabetic()); ascii_letter ^ ASCII_CASE_MASK } #[inline(always)] -fn case_insensitive_eq(lhs: char, rhs: char) -> bool { +pub fn case_insensitive_eq(lhs: char, rhs: char) -> bool { lhs.eq_ignore_ascii_case(&rhs) } #[inline(always)] -fn case_insensitive_normalized_eq(lhs: char, rhs: char) -> bool { +pub fn case_insensitive_normalized_eq(lhs: char, rhs: char) -> bool { lhs.eq_ignore_ascii_case(&normalize_candidate_char(lhs, rhs)) } #[inline(always)] -fn case_sensitive_eq(lhs: char, rhs: char) -> bool { +pub fn case_sensitive_eq(lhs: char, rhs: char) -> bool { lhs == rhs } #[inline(always)] -fn case_sensitive_normalized_eq(lhs: char, rhs: char) -> bool { +pub fn case_sensitive_normalized_eq(lhs: char, rhs: char) -> bool { lhs == normalize_candidate_char(lhs, rhs) } -/// TODO: docs -#[inline(always)] -pub fn char_eq( - is_case_sensitive: bool, - normalize_candidate: bool, -) -> fn(char, char) -> bool { - match (is_case_sensitive, normalize_candidate) { - (false, false) => case_insensitive_eq, - (true, false) => case_sensitive_eq, - (false, true) => case_insensitive_normalized_eq, - (true, true) => case_sensitive_normalized_eq, - } -} - /// TODO: docs #[inline(always)] pub fn char_len(s: &str) -> usize { s.chars().count() } -/// TODO: docs -#[inline(always)] -pub fn find_first( - needle: char, - haystack: &str, - is_candidate_ascii: bool, - is_case_sensitive: bool, - char_eq: CharEq, -) -> Option<(usize, char)> { - if is_candidate_ascii { - if needle.is_ascii() { - find_first_ascii(needle as u8, haystack, is_case_sensitive) - } else { - None - } - } else { - find_first_unicode(needle, haystack, char_eq) - } -} - -/// TODO: docs -#[inline(always)] -fn find_first_ascii( - needle: u8, - haystack: &str, - is_case_sensitive: bool, -) -> Option<(usize, char)> { - debug_assert!(needle.is_ascii()); - debug_assert!(haystack.is_ascii()); - - let haystack = haystack.as_bytes(); - - let idx = if is_case_sensitive || !needle.is_ascii_alphabetic() { - memchr::memchr(needle, haystack) - } else { - memchr::memchr2(needle, ascii_letter_flip_case(needle), haystack) - }?; - - Some((idx, haystack[idx] as char)) -} - -/// TODO: docs -#[inline(always)] -fn find_first_unicode( - needle: char, - haystack: &str, - char_eq: CharEq, -) -> Option<(usize, char)> { - haystack - .char_indices() - .find_map(|(offset, ch)| char_eq(needle, ch).then_some((offset, ch))) -} - -/// TODO: docs -#[inline(always)] -pub fn find_last( - needle: char, - haystack: &str, - is_candidate_ascii: bool, - is_case_sensitive: bool, - char_eq: CharEq, -) -> Option<(usize, char)> { - if is_candidate_ascii { - if needle.is_ascii() { - find_last_ascii(needle as u8, haystack, is_case_sensitive) - } else { - None - } - } else { - find_last_unicode(needle, haystack, char_eq) - } -} - -/// TODO: docs -#[inline(always)] -fn find_last_ascii( - needle: u8, - haystack: &str, - is_case_sensitive: bool, -) -> Option<(usize, char)> { - debug_assert!(needle.is_ascii()); - debug_assert!(haystack.is_ascii()); - - let haystack = haystack.as_bytes(); - - let idx = if is_case_sensitive || !needle.is_ascii_alphabetic() { - memchr::memchr_iter(needle, haystack).next_back() - } else { - memchr::memchr2_iter(needle, ascii_letter_flip_case(needle), haystack) - .next_back() - }?; - - Some((idx, haystack[idx] as char)) -} - -/// TODO: docs -#[inline(always)] -fn find_last_unicode( - needle: char, - haystack: &str, - char_eq: CharEq, -) -> Option<(usize, char)> { - haystack - .char_indices() - .rev() - .find_map(|(offset, ch)| char_eq(needle, ch).then_some((offset, ch))) -} - /// TODO: docs #[inline(always)] pub fn leading_spaces(s: &str) -> usize {