From 4066a03073acebdd5226cd30096d9e7195dc1eac Mon Sep 17 00:00:00 2001 From: mjuhanne Date: Sun, 3 Sep 2023 18:24:11 +0300 Subject: [PATCH] Search engine: Prioritize high frequency (and higher Kanken grade) kanjis. --- addon/search_engine.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/addon/search_engine.py b/addon/search_engine.py index 14eb12b..a64b20a 100644 --- a/addon/search_engine.py +++ b/addon/search_engine.py @@ -21,10 +21,10 @@ def csv_to_list(csv): ("kunyomi", j2c, None), ("nanori", j2c, None), ("meanings", j2c, None), - #("frequency_rank", _, None), + ("frequency_rank", _, None), #("grade", _, None), #("jlpt", _, None), - #("kanken", _, None), + ("kanken", _, None), ("primitives", _, None), ("primitive_of", _, None), ("primitive_keywords", j2c, None), @@ -92,6 +92,7 @@ def __init__(self, db_cursor): self.meaning_cache = dict() self.stories_cache = dict() self.primitive_alternative_cache = dict() + self.frequency_points = dict() self.init_cache() @@ -240,6 +241,17 @@ def update_cache(self, character=None): self.stories_cache[c] = st + points = 0 + if d['frequency_rank'] is not None and d['frequency_rank'] != '': + fr_points = (4000 - d['frequency_rank'])/400 + if fr_points <= 0: + fr_points = 0 + points += fr_points + if d['kanken'] is not None and d['kanken'] != '': + points += 11 - float(d['kanken']) + if points > 0: + self.frequency_points[c] = points + # create a reverse lookup table for primitive alternatives if len(d['primitive_alternatives']) > 0: prim_alt_list = custom_list(d['primitive_alternatives']) @@ -304,6 +316,8 @@ def get_matching_characters_with_scoring(self, search_terms, pool, is_set, pool_ else: kanji_scores[character] = pool_priority kanji_matches[character] = {search_term} + if character in self.frequency_points: + kanji_scores[character] += self.frequency_points[character] def get_matching_characters_from_list_of_pools(self, search_terms, pool_list, max_results):