Glottochronology (exluded exclusions) extends #1435 (#1446)

* Reduce multi spaces * Refactoring
ispras · Jun 22, 2023 · eefccdd · eefccdd
1 parent 685dcbc
commit eefccdd
Showing 1 changed file with 9 additions and 3 deletions.
diff --git a/lingvodoc/schema/query.py b/lingvodoc/schema/query.py
@@ -13242,13 +13242,18 @@ def compare_translations(swadesh_lex, dictionary_lex):
             def split_lex(lex):
                 # Split by commas and open brackets to separate
                 # various forms of lexeme and extra note if is
-                return set(f" {form}".lower().replace(" заим.", "").strip()
+                lex = ' '.join(lex.lower().split()) # reduce multi spaces
+                if "убрать из стословника" in lex:
+                    return set()
+
+                return set(form.strip()
                            for form in lex.replace('(', ',').split(',')
-                           if form.strip()
-                           and ')' not in form)  # exclude notes
+                           if form.strip() and ')' not in form)  # exclude notes
+
             # return true if the intersection is not empty
             return bool(split_lex(swadesh_lex) & split_lex(dictionary_lex))
 
+
         # Gathering entry grouping data.
 
         if not debug_flag:
@@ -13294,6 +13299,7 @@ def split_lex(lex):
                 with gzip.open(tag_data_file_name, 'wb') as tag_data_file:
                     pickle.dump((r1, group_list, r3), tag_data_file)
 
+
         # Getting text data for each perspective.
         # entries_set gathers entry_id(s) of words met in Swadesh' list
         # swadesh_total gathers numbers of words within Swadesh' list