Skip to content

Commit

Permalink
fix: ending word getting removed in default parser if it doesn't end …
Browse files Browse the repository at this point in the history
…with punctuation
  • Loading branch information
BrewingWeasel committed Aug 18, 2024
1 parent bfefc7b commit f62d9f2
Showing 1 changed file with 32 additions and 0 deletions.
32 changes: 32 additions & 0 deletions src-tauri/src/language_parsing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -595,6 +595,9 @@ fn default_tokenizer(
) -> Result<(Vec<String>, Vec<Word>), KalbaError> {
let mut words = Vec::new();
let mut sentences = Vec::new();
if sent.is_empty() {
return Ok((sentences, words));
}
let mut current_sentence = String::new();

let mut currently_building = String::new();
Expand Down Expand Up @@ -665,6 +668,35 @@ fn default_tokenizer(
}
}
}

if !currently_building.is_empty() {
let word = std::mem::take(&mut currently_building);
let rating = state
.to_save
.language_specific
.get_mut(&language)
.expect("language to be chosen")
.words
.entry(word.clone())
.or_insert(crate::WordInfo {
rating: 0,
method: crate::Method::FromSeen,
history: vec![(chrono::Utc::now(), crate::Method::FromSeen, 0)],
})
.rating;

words.push(Word {
text: word.clone(),
clickable: true,
lemma: word.clone(),
rating,
morph: HashMap::new(),
other_forms: get_alternate_forms(&word, interpreter, state)?,
length: word.chars().count(),
whitespace_after: true,
sentence_index: sentences.len(),
})
}
if !current_sentence.is_empty() {
sentences.push(current_sentence);
}
Expand Down

0 comments on commit f62d9f2

Please sign in to comment.