Skip to content

Commit

Permalink
fix: words not lining up with sections with default non stanza tokenizer
Browse files Browse the repository at this point in the history
  • Loading branch information
BrewingWeasel committed Jul 27, 2024
1 parent 1ad937b commit d4a18f3
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion src-tauri/src/language_parsing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,7 @@ pub async fn parse_url(
.peekable();

let mut get_words = |length| {
log::trace!("section length: {length}");
let mut current_length = 0;
let mut words = Vec::new();
while let Some(word) = all_words.peek() {
Expand Down Expand Up @@ -520,7 +521,7 @@ fn default_tokenizer(
rating,
morph: HashMap::new(),
other_forms: get_alternate_forms(&word, interpreter, state)?,
length: word.len() + 1,
length: word.chars().count(),
whitespace_after,
})
}
Expand Down

0 comments on commit d4a18f3

Please sign in to comment.