From d4a18f3fe582c6cd0931c8cb843f2c5d30197a99 Mon Sep 17 00:00:00 2001 From: Finn Brewer Date: Sat, 27 Jul 2024 15:10:19 -0700 Subject: [PATCH] fix: words not lining up with sections with default non stanza tokenizer --- src-tauri/src/language_parsing.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src-tauri/src/language_parsing.rs b/src-tauri/src/language_parsing.rs index 2c593b7..dad7d4d 100644 --- a/src-tauri/src/language_parsing.rs +++ b/src-tauri/src/language_parsing.rs @@ -224,6 +224,7 @@ pub async fn parse_url( .peekable(); let mut get_words = |length| { + log::trace!("section length: {length}"); let mut current_length = 0; let mut words = Vec::new(); while let Some(word) = all_words.peek() { @@ -520,7 +521,7 @@ fn default_tokenizer( rating, morph: HashMap::new(), other_forms: get_alternate_forms(&word, interpreter, state)?, - length: word.len() + 1, + length: word.chars().count(), whitespace_after, }) }